[ tor-browser ].git.dasho

commit e853f0faa557747ffc0c375d086489ee6bcadd24
parent f0ae0ea567b04003bd1efcf59840618a40d301b4
Author: Randy Concepcion <rconcepcion@mozilla.com>
Date:   Tue, 30 Dec 2025 15:24:07 +0000

Bug 2005754 - Convert SecurityOrchestrator to singleton with multi-session support r=ai-ondevice-reviewers,gregtatum

Convert SecurityOrchestrator from per-window instances to a shared singleton
with session-keyed ledgers. This enables proper security-context isolation when
multiple AI Windows are active.

- Add getInstance() / getSecurityOrchestrator() as singleton entry point
- Add registerSession() / cleanupSession() for AI Window lifecycle
- Update evaluate() and getSessionLedger() to require sessionId
- Fail closed for unknown sessions (UNKNOWN_SESSION)
- Add resetForTesting() for test isolation

Depends on D274680

Differential Revision: https://phabricator.services.mozilla.com/D276793

Diffstat:
M toolkit/components/ml/security/SecurityOrchestrator.sys.mjs  | 254 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------
M toolkit/components/ml/tests/xpcshell/test_condition_evaluator.js  | 75 +++++++++++++++++++++++++++++++++++++++++++--------------------------------
M toolkit/components/ml/tests/xpcshell/test_json_policy_system.js  | 120 +++++++++++++++++++++++++++++++++++++++++++++----------------------------------
M toolkit/components/ml/tests/xpcshell/test_policy_evaluator.js  | 84 +++++++++++++++++++++++++++++++++++++++++++++----------------------------------
M toolkit/components/ml/tests/xpcshell/test_security_orchestrator.js  | 320 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------

5 files changed, 604 insertions(+), 249 deletions(-)
diff --git a/toolkit/components/ml/security/SecurityOrchestrator.sys.mjs b/toolkit/components/ml/security/SecurityOrchestrator.sys.mjs
@@ -41,7 +41,9 @@ function isSecurityEnabled() {
 
 /**
  * Central security orchestrator for Firefox AI features.
- * Each AI Window instance creates its own SecurityOrchestrator via create().
+ *
+ * This is a singleton service. Use getSecurityOrchestrator() to access the instance.
+ * The orchestrator is lazily initialized on first access and shared across all callers.
  *
  * ## Evaluation Flow
  *
@@ -75,48 +77,47 @@ function isSecurityEnabled() {
  */
 export class SecurityOrchestrator {
   /**
-   * Registry of security policies by phase.
+   * Singleton instance promise.
    *
-   * @type {Map<string, Array<object>>}
+   * @type {Promise<SecurityOrchestrator>|null}
    */
-  #policies = new Map();
+  static #instancePromise = null;
 
   /**
-   * Session ledger for URL tracking across tabs in this window.
+   * Registry of security policies by phase.
    *
-   * @type {lazy.SessionLedger}
+   * @type {Map<string, Array<object>>}
    */
-  #sessionLedger;
+  #policies = new Map();
 
   /**
-   * Session identifier for this window.
+   * Session ledgers keyed by sessionId.
+   * Each AI Window session has its own isolated ledger.
    *
-   * @type {string}
+   * @type {Map<string, lazy.SessionLedger>}
    */
-  #sessionId;
+  #sessionLedgers = new Map();
 
   /**
-   * Used by create() to instantiate SecurityOrchestrator instance.
-   *
-   * @param {string} sessionId - Unique identifier for this session
+   * Private constructor. Use getSecurityOrchestrator() to get the singleton instance.
    */
-  constructor(sessionId) {
-    this.#sessionId = sessionId;
-    this.#sessionLedger = new lazy.SessionLedger(sessionId);
+  constructor() {
+    // Session ledgers are created via registerSession()
   }
 
   /**
-   * Creates and initializes a new SecurityOrchestrator instance.
+   * Creates and initializes the singleton SecurityOrchestrator instance.
+   * Called only once via getInstance().
    *
-   * @param {string} sessionId - Unique identifier for this session
    * @returns {Promise<SecurityOrchestrator>} Initialized orchestrator instance
+   * @private
    */
-  static async create(sessionId) {
-    const instance = new SecurityOrchestrator(sessionId);
+  static async #createInstance() {
+    const instance = new SecurityOrchestrator();
     await instance.#loadPolicies();
 
-    lazy.console.warn(
-      `[Security] Orchestrator initialized for session ${sessionId} with ${Array.from(
+    lazy.console.debug(
+      `[Security] Orchestrator singleton initialized with ${Array.from(
         instance.#policies.values()
       ).reduce((sum, policies) => sum + policies.length, 0)} policies`
     );
@@ -125,6 +126,103 @@ export class SecurityOrchestrator {
   }
 
   /**
+   * Gets the singleton SecurityOrchestrator instance.
+   *
+   * The orchestrator is lazily initialized on first call. Subsequent calls
+   * return the same instance. If initialization fails, the error is thrown
+   * and the next call will retry initialization.
+   *
+   * @returns {Promise<SecurityOrchestrator>} The singleton orchestrator instance
+   * @throws {Error} If policy loading or initialization fails
+   */
+  static async getInstance() {
+    if (!SecurityOrchestrator.#instancePromise) {
+      SecurityOrchestrator.#instancePromise =
+        SecurityOrchestrator.#createInstance().catch(error => {
+          // Reset so next call can retry
+          SecurityOrchestrator.#instancePromise = null;
+          lazy.console.error(
+            "[Security] Orchestrator initialization failed:",
+            error
+          );
+          throw error;
+        });
+    }
+    return SecurityOrchestrator.#instancePromise;
+  }
+
+  /**
+   * Registers a new session with its own isolated ledger.
+   * Called when an AI Window opens.
+   *
+   * This method is idempotent - calling it multiple times with the same
+   * sessionId will not create duplicate ledgers.
+   *
+   * @param {string} sessionId - Unique identifier for the session
+   */
+  registerSession(sessionId) {
+    if (!sessionId || typeof sessionId !== "string") {
+      throw new TypeError(
+        "registerSession requires a non-empty string sessionId"
+      );
+    }
+    if (this.#sessionLedgers.has(sessionId)) {
+      lazy.console.debug(`[Security] Session ${sessionId} already registered`);
+      return;
+    }
+    this.#sessionLedgers.set(sessionId, new lazy.SessionLedger(sessionId));
+    lazy.console.debug(`[Security] Registered session ${sessionId}`);
+  }
+
+  /**
+   * Cleans up a session and removes its ledger.
+   * Called when an AI Window closes.
+   *
+   * This method is idempotent - calling it with a non-existent sessionId
+   * will not throw an error.
+   *
+   * @param {string} sessionId - Unique identifier for the session
+   */
+  cleanupSession(sessionId) {
+    const deleted = this.#sessionLedgers.delete(sessionId);
+    if (deleted) {
+      lazy.console.debug(`[Security] Cleaned up session ${sessionId}`);
+    }
+  }
+
+  /**
+   * Clears internal state for testing purposes.
+   * Called by resetForTesting() to clean up instance data.
+   */
+  clearForTesting() {
+    this.#sessionLedgers.clear();
+  }
+
+  /**
+   * Resets the orchestrator state for testing purposes.
+   * Only available in automation (tests).
+   *
+   * This clears all session ledgers and resets the singleton instance,
+   * allowing tests to start with a clean state.
+   *
+   * @returns {Promise<void>}
+   * @throws {Error} If called outside of automation
+   */
+  static async resetForTesting() {
+    if (!Cu.isInAutomation) {
+      throw new Error("resetForTesting() only available in automation");
+    }
+
+    const instancePromise = SecurityOrchestrator.#instancePromise;
+    if (instancePromise) {
+      await instancePromise.then(instance => instance.clearForTesting());
+    }
+    SecurityOrchestrator.#instancePromise = null;
+
+    lazy.console.debug("[Security] Orchestrator reset for testing");
+  }
+
+  /**
    * Loads and validates policies from JSON files.
    *
    * @private
@@ -179,12 +277,13 @@ export class SecurityOrchestrator {
   }
 
   /**
-   * Gets the session ledger for this orchestrator.
+   * Gets the session ledger for a specific session.
    *
-   * @returns {lazy.SessionLedger} The session ledger
+   * @param {string} sessionId - The session identifier
+   * @returns {lazy.SessionLedger|undefined} The session ledger, or undefined if not found
    */
-  getSessionLedger() {
-    return this.#sessionLedger;
+  getSessionLedger(sessionId) {
+    return this.#sessionLedgers.get(sessionId);
   }
 
   /**
@@ -196,7 +295,7 @@ export class SecurityOrchestrator {
    *
    * @example
    * // AI Window dispatching a tool call:
-   * const decision = await orchestrator.evaluate({
+   * const decision = await orchestrator.evaluate("session-123", {
    *   phase: "tool.execution",
    *   action: {
    *     type: "tool.call",
@@ -212,35 +311,40 @@ export class SecurityOrchestrator {
    * });
    * // Returns: { effect: "allow" } or { effect: "deny", code: "UNSEEN_LINK", ... }
    *
+   * @param {string} sessionId - The session identifier
    * @param {object} envelope - Security check request
    * @param {string} envelope.phase - Security phase ("tool.execution", etc.)
    * @param {object} envelope.action - Action being checked (type, tool, urls, etc.)
    * @param {object} envelope.context - Request context (tabId, requestId, etc.)
    * @returns {Promise<object>} Decision object with effect (allow/deny), code, reason
+   * @throws {Error} If session is not registered or envelope is invalid
    */
-  async evaluate(envelope) {
+  async evaluate(sessionId, envelope) {
     const startTime = ChromeUtils.now();
 
-    try {
-      if (!envelope || typeof envelope !== "object") {
-        return lazy.createDenyDecision(
-          "INVALID_REQUEST",
-          "Security envelope is null or invalid"
-        );
-      }
+    // Check for valid session first
+    const sessionLedger = this.#sessionLedgers.get(sessionId);
+    if (!sessionLedger) {
+      throw new Error(`Session ${sessionId} is not registered`);
+    }
 
-      const { phase, action, context } = envelope;
-      if (!phase || !action || !context) {
-        return lazy.createDenyDecision(
-          "INVALID_REQUEST",
-          "Security envelope missing required fields (phase, action, or context)"
-        );
-      }
+    if (!envelope || typeof envelope !== "object") {
+      throw new Error("Security envelope is null or invalid");
+    }
+
+    const { phase, action, context } = envelope;
+    if (!phase || !action || !context) {
+      throw new Error(
+        "Security envelope missing required fields (phase, action, or context)"
+      );
+    }
 
+    const requestId = context.requestId;
+    try {
       if (!isSecurityEnabled()) {
         lazy.logSecurityEvent({
-          requestId: context.requestId,
-          sessionId: this.#sessionId,
+          requestId,
+          sessionId,
           phase,
           action,
           context: {
@@ -263,8 +367,8 @@ export class SecurityOrchestrator {
           reason: "No policies for phase",
         });
         lazy.logSecurityEvent({
-          requestId: context.requestId,
-          sessionId: this.#sessionId,
+          requestId,
+          sessionId,
           phase,
           action,
           context: {
@@ -279,14 +383,14 @@ export class SecurityOrchestrator {
 
       const fullContext = {
         ...context,
-        sessionLedger: this.#sessionLedger,
-        sessionId: this.#sessionId,
+        sessionLedger,
+        sessionId,
         timestamp: ChromeUtils.now(),
       };
 
       const { currentTabId, mentionedTabIds = [] } = context;
       const tabsToCheck = [currentTabId, ...mentionedTabIds];
-      const linkLedger = this.#sessionLedger.merge(tabsToCheck);
+      const linkLedger = sessionLedger.merge(tabsToCheck);
       fullContext.linkLedger = linkLedger;
 
       const decision = lazy.evaluatePhasePolicies(
@@ -296,8 +400,8 @@ export class SecurityOrchestrator {
       );
 
       lazy.logSecurityEvent({
-        requestId: context.requestId,
-        sessionId: this.#sessionId,
+        requestId,
+        sessionId,
         phase,
         action,
         context: {
@@ -317,12 +421,12 @@ export class SecurityOrchestrator {
       );
 
       lazy.logSecurityEvent({
-        requestId: envelope?.context?.requestId,
-        sessionId: this.#sessionId,
-        phase: envelope?.phase || "unknown",
-        action: envelope?.action || {},
+        requestId,
+        sessionId,
+        phase,
+        action,
         context: {
-          tainted: envelope?.context?.tainted ?? false,
+          tainted: context.tainted ?? false,
           trustedCount: 0,
         },
         decision: errorDecision,
@@ -366,21 +470,37 @@ export class SecurityOrchestrator {
       };
     }
 
+    const sessionStats = {};
+    for (const [sessionId, ledger] of this.#sessionLedgers.entries()) {
+      sessionStats[sessionId] = {
+        tabCount: ledger.tabCount(),
+        totalUrls: Array.from(ledger.tabs.values()).reduce(
+          (sum, tabLedger) => sum + tabLedger.size(),
+          0
+        ),
+      };
+    }
+
     return {
-      sessionId: this.#sessionId,
-      initialized: this.#sessionLedger !== null,
+      initialized: this.#policies.size > 0,
       registeredPhases: Array.from(this.#policies.keys()),
       totalPolicies,
       policyBreakdown,
-      sessionLedgerStats: this.#sessionLedger
-        ? {
-            tabCount: this.#sessionLedger.tabCount(),
-            totalUrls: Array.from(this.#sessionLedger.tabs.values()).reduce(
-              (sum, ledger) => sum + ledger.size(),
-              0
-            ),
-          }
-        : null,
+      sessionCount: this.#sessionLedgers.size,
+      sessionStats,
     };
   }
 }
+
+/**
+ * Gets the singleton SecurityOrchestrator instance.
+ *
+ * This is the preferred way to access the SecurityOrchestrator.
+ * The orchestrator is lazily initialized on first call.
+ *
+ * @returns {Promise<SecurityOrchestrator>} The singleton orchestrator instance
+ * @throws {Error} If policy loading or initialization fails
+ */
+export async function getSecurityOrchestrator() {
+  return SecurityOrchestrator.getInstance();
+}
diff --git a/toolkit/components/ml/tests/xpcshell/test_condition_evaluator.js b/toolkit/components/ml/tests/xpcshell/test_condition_evaluator.js
@@ -12,12 +12,15 @@
  * Focus: Testing condition evaluation behavior through policy execution
  */
 
-const { SecurityOrchestrator } = ChromeUtils.importESModule(
-  "chrome://global/content/ml/security/SecurityOrchestrator.sys.mjs"
-);
+const { SecurityOrchestrator, getSecurityOrchestrator } =
+  ChromeUtils.importESModule(
+    "chrome://global/content/ml/security/SecurityOrchestrator.sys.mjs"
+  );
 
 const PREF_SECURITY_ENABLED = "browser.ml.security.enabled";
 
+const TEST_SESSION_ID = "test-session";
+
 /** @type {SecurityOrchestrator|null} */
 let orchestrator = null;
 
@@ -25,8 +28,9 @@ function setup() {
   Services.prefs.clearUserPref(PREF_SECURITY_ENABLED);
 }
 
-function teardown() {
+async function teardown() {
   Services.prefs.clearUserPref(PREF_SECURITY_ENABLED);
+  await SecurityOrchestrator.resetForTesting();
   orchestrator = null;
 }
 
@@ -42,13 +46,14 @@ function teardown() {
 add_task(async function test_condition_passes_when_all_urls_in_ledger() {
   setup();
 
-  orchestrator = await SecurityOrchestrator.create("test-session");
-  const ledger = orchestrator.getSessionLedger();
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
   const tabLedger = ledger.forTab("tab-1");
   tabLedger.add("https://example.com");
   tabLedger.add("https://mozilla.org");
 
-  const decision = await orchestrator.evaluate({
+  const decision = await orchestrator.evaluate(TEST_SESSION_ID, {
     phase: "tool.execution",
     action: {
       type: "tool.call",
@@ -69,7 +74,7 @@ add_task(async function test_condition_passes_when_all_urls_in_ledger() {
     "Should allow when all URLs in ledger (condition passes)"
   );
 
-  teardown();
+  await teardown();
 });
 
 /**
@@ -83,11 +88,12 @@ add_task(async function test_condition_passes_when_all_urls_in_ledger() {
 add_task(async function test_condition_fails_when_url_missing_from_ledger() {
   setup();
 
-  orchestrator = await SecurityOrchestrator.create("test-session");
-  const ledger = orchestrator.getSessionLedger();
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
   ledger.forTab("tab-1").add("https://example.com");
 
-  const decision = await orchestrator.evaluate({
+  const decision = await orchestrator.evaluate(TEST_SESSION_ID, {
     phase: "tool.execution",
     action: {
       type: "tool.call",
@@ -109,7 +115,7 @@ add_task(async function test_condition_fails_when_url_missing_from_ledger() {
   );
   Assert.equal(decision.code, "UNSEEN_LINK");
 
-  teardown();
+  await teardown();
 });
 
 /**
@@ -123,11 +129,12 @@ add_task(async function test_condition_fails_when_url_missing_from_ledger() {
 add_task(async function test_condition_passes_with_empty_urls_array() {
   setup();
 
-  orchestrator = await SecurityOrchestrator.create("test-session");
-  const ledger = orchestrator.getSessionLedger();
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
   ledger.forTab("tab-1");
 
-  const decision = await orchestrator.evaluate({
+  const decision = await orchestrator.evaluate(TEST_SESSION_ID, {
     phase: "tool.execution",
     action: {
       type: "tool.call",
@@ -148,7 +155,7 @@ add_task(async function test_condition_passes_with_empty_urls_array() {
     "Should allow with empty URLs (nothing to check)"
   );
 
-  teardown();
+  await teardown();
 });
 
 /**
@@ -162,11 +169,12 @@ add_task(async function test_condition_passes_with_empty_urls_array() {
 add_task(async function test_condition_fails_with_malformed_url() {
   setup();
 
-  orchestrator = await SecurityOrchestrator.create("test-session");
-  const ledger = orchestrator.getSessionLedger();
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
   ledger.forTab("tab-1");
 
-  const decision = await orchestrator.evaluate({
+  const decision = await orchestrator.evaluate(TEST_SESSION_ID, {
     phase: "tool.execution",
     action: {
       type: "tool.call",
@@ -190,7 +198,7 @@ add_task(async function test_condition_fails_with_malformed_url() {
   // caught as specifically malformed at this layer
   Assert.equal(decision.code, "UNSEEN_LINK");
 
-  teardown();
+  await teardown();
 });
 
 /**
@@ -204,11 +212,12 @@ add_task(async function test_condition_fails_with_malformed_url() {
 add_task(async function test_condition_checks_current_tab_only() {
   setup();
 
-  orchestrator = await SecurityOrchestrator.create("test-session");
-  const ledger = orchestrator.getSessionLedger();
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
   ledger.forTab("tab-1").add("https://example.com");
 
-  const decision = await orchestrator.evaluate({
+  const decision = await orchestrator.evaluate(TEST_SESSION_ID, {
     phase: "tool.execution",
     action: {
       type: "tool.call",
@@ -229,7 +238,7 @@ add_task(async function test_condition_checks_current_tab_only() {
     "Should check current tab ledger only"
   );
 
-  teardown();
+  await teardown();
 });
 
 /**
@@ -244,13 +253,14 @@ add_task(async function test_condition_checks_current_tab_only() {
 add_task(async function test_condition_merges_mentioned_tabs() {
   setup();
 
-  orchestrator = await SecurityOrchestrator.create("test-session");
-  const ledger = orchestrator.getSessionLedger();
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
 
   ledger.forTab("tab-1").add("https://example.com");
   ledger.forTab("tab-2").add("https://mozilla.org");
 
-  const decision = await orchestrator.evaluate({
+  const decision = await orchestrator.evaluate(TEST_SESSION_ID, {
     phase: "tool.execution",
     action: {
       type: "tool.call",
@@ -271,7 +281,7 @@ add_task(async function test_condition_merges_mentioned_tabs() {
     "Should merge current tab + @mentioned tabs"
   );
 
-  teardown();
+  await teardown();
 });
 
 /**
@@ -286,11 +296,12 @@ add_task(async function test_condition_merges_mentioned_tabs() {
 add_task(async function test_condition_normalizes_urls() {
   setup();
 
-  orchestrator = await SecurityOrchestrator.create("test-session");
-  const ledger = orchestrator.getSessionLedger();
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
   ledger.forTab("tab-1").add("https://example.com/page"); // No fragment
 
-  const decision = await orchestrator.evaluate({
+  const decision = await orchestrator.evaluate(TEST_SESSION_ID, {
     phase: "tool.execution",
     action: {
       type: "tool.call",
@@ -311,5 +322,5 @@ add_task(async function test_condition_normalizes_urls() {
     "Should allow after normalizing URLs (fragments stripped)"
   );
 
-  teardown();
+  await teardown();
 });
diff --git a/toolkit/components/ml/tests/xpcshell/test_json_policy_system.js b/toolkit/components/ml/tests/xpcshell/test_json_policy_system.js
@@ -12,14 +12,17 @@
  * - @Mentions support
  */
 
-const { SecurityOrchestrator } = ChromeUtils.importESModule(
-  "chrome://global/content/ml/security/SecurityOrchestrator.sys.mjs"
-);
+const { SecurityOrchestrator, getSecurityOrchestrator } =
+  ChromeUtils.importESModule(
+    "chrome://global/content/ml/security/SecurityOrchestrator.sys.mjs"
+  );
 
 const PREF_SECURITY_ENABLED = "browser.ml.security.enabled";
 const POLICY_JSON_URL =
   "chrome://global/content/ml/security/policies/tool-execution-policies.json";
 
+const TEST_SESSION_ID = "test-session";
+
 /** @type {SecurityOrchestrator|null} */
 let orchestrator = null;
 
@@ -27,8 +30,9 @@ function setup() {
   Services.prefs.clearUserPref(PREF_SECURITY_ENABLED);
 }
 
-function teardown() {
+async function teardown() {
   Services.prefs.clearUserPref(PREF_SECURITY_ENABLED);
+  await SecurityOrchestrator.resetForTesting();
   orchestrator = null;
 }
 
@@ -59,7 +63,7 @@ add_task(async function test_json_policy_file_loads_and_validates() {
   Assert.ok(policy.phase, "Policy should have phase");
   Assert.ok(policy.effect, "Policy should have effect");
 
-  teardown();
+  await teardown();
 });
 
 /**
@@ -73,16 +77,20 @@ add_task(async function test_json_policy_file_loads_and_validates() {
 add_task(async function test_orchestrator_initializes_with_policies() {
   setup();
 
-  // If create succeeds, policies loaded correctly
-  orchestrator = await SecurityOrchestrator.create("test-session");
-  const ledger = orchestrator.getSessionLedger();
+  // If getSecurityOrchestrator succeeds, policies loaded correctly
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
 
   Assert.ok(ledger, "Should initialize successfully");
-  Assert.ok(orchestrator.getSessionLedger(), "Should have session ledger");
+  Assert.ok(
+    orchestrator.getSessionLedger(TEST_SESSION_ID),
+    "Should have session ledger"
+  );
 
   // Verify policies work by testing actual evaluation
   ledger.forTab("tab-1");
-  const decision = await orchestrator.evaluate({
+  const decision = await orchestrator.evaluate(TEST_SESSION_ID, {
     phase: "tool.execution",
     action: {
       type: "tool.call",
@@ -108,7 +116,7 @@ add_task(async function test_orchestrator_initializes_with_policies() {
     "Should use JSON policy"
   );
 
-  teardown();
+  await teardown();
 });
 
 /**
@@ -122,11 +130,12 @@ add_task(async function test_orchestrator_initializes_with_policies() {
 add_task(async function test_e2e_deny_unseen_link() {
   setup();
 
-  orchestrator = await SecurityOrchestrator.create("test-session");
-  const ledger = orchestrator.getSessionLedger();
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
   ledger.forTab("tab-1"); // Empty ledger
 
-  const decision = await orchestrator.evaluate({
+  const decision = await orchestrator.evaluate(TEST_SESSION_ID, {
     phase: "tool.execution",
     action: {
       type: "tool.call",
@@ -157,7 +166,7 @@ add_task(async function test_e2e_deny_unseen_link() {
     "Should be from block-unseen-links policy"
   );
 
-  teardown();
+  await teardown();
 });
 
 /**
@@ -171,12 +180,13 @@ add_task(async function test_e2e_deny_unseen_link() {
 add_task(async function test_e2e_deny_if_any_url_unseen() {
   setup();
 
-  orchestrator = await SecurityOrchestrator.create("test-session");
-  const ledger = orchestrator.getSessionLedger();
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
   const tabLedger = ledger.forTab("tab-1");
   tabLedger.add("https://example.com");
 
-  const decision = await orchestrator.evaluate({
+  const decision = await orchestrator.evaluate(TEST_SESSION_ID, {
     phase: "tool.execution",
     action: {
       type: "tool.call",
@@ -201,7 +211,7 @@ add_task(async function test_e2e_deny_if_any_url_unseen() {
   );
   Assert.equal(decision.code, "UNSEEN_LINK");
 
-  teardown();
+  await teardown();
 });
 
 /**
@@ -215,11 +225,12 @@ add_task(async function test_e2e_deny_if_any_url_unseen() {
 add_task(async function test_e2e_deny_malformed_url() {
   setup();
 
-  orchestrator = await SecurityOrchestrator.create("test-session");
-  const ledger = orchestrator.getSessionLedger();
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
   ledger.forTab("tab-1");
 
-  const decision = await orchestrator.evaluate({
+  const decision = await orchestrator.evaluate(TEST_SESSION_ID, {
     phase: "tool.execution",
     action: {
       type: "tool.call",
@@ -243,7 +254,7 @@ add_task(async function test_e2e_deny_malformed_url() {
   // caught as specifically malformed
   Assert.equal(decision.code, "UNSEEN_LINK");
 
-  teardown();
+  await teardown();
 });
 
 /**
@@ -257,12 +268,13 @@ add_task(async function test_e2e_deny_malformed_url() {
 add_task(async function test_e2e_allow_seeded_url() {
   setup();
 
-  orchestrator = await SecurityOrchestrator.create("test-session");
-  const ledger = orchestrator.getSessionLedger();
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
   const tabLedger = ledger.forTab("tab-1");
   tabLedger.add("https://example.com");
 
-  const decision = await orchestrator.evaluate({
+  const decision = await orchestrator.evaluate(TEST_SESSION_ID, {
     phase: "tool.execution",
     action: {
       type: "tool.call",
@@ -283,7 +295,7 @@ add_task(async function test_e2e_allow_seeded_url() {
     "CRITICAL: Should allow seeded URL (real policy from JSON)"
   );
 
-  teardown();
+  await teardown();
 });
 
 /**
@@ -297,13 +309,14 @@ add_task(async function test_e2e_allow_seeded_url() {
 add_task(async function test_e2e_allow_multiple_seeded_urls() {
   setup();
 
-  orchestrator = await SecurityOrchestrator.create("test-session");
-  const ledger = orchestrator.getSessionLedger();
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
   const tabLedger = ledger.forTab("tab-1");
   tabLedger.add("https://example.com");
   tabLedger.add("https://mozilla.org");
 
-  const decision = await orchestrator.evaluate({
+  const decision = await orchestrator.evaluate(TEST_SESSION_ID, {
     phase: "tool.execution",
     action: {
       type: "tool.call",
@@ -320,7 +333,7 @@ add_task(async function test_e2e_allow_multiple_seeded_urls() {
 
   Assert.equal(decision.effect, "allow", "Should allow when all URLs seeded");
 
-  teardown();
+  await teardown();
 });
 
 /**
@@ -333,11 +346,12 @@ add_task(async function test_e2e_allow_multiple_seeded_urls() {
 add_task(async function test_e2e_allow_empty_urls() {
   setup();
 
-  orchestrator = await SecurityOrchestrator.create("test-session");
-  const ledger = orchestrator.getSessionLedger();
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
   ledger.forTab("tab-1");
 
-  const decision = await orchestrator.evaluate({
+  const decision = await orchestrator.evaluate(TEST_SESSION_ID, {
     phase: "tool.execution",
     action: {
       type: "tool.call",
@@ -354,7 +368,7 @@ add_task(async function test_e2e_allow_empty_urls() {
 
   Assert.equal(decision.effect, "allow", "Should allow when no URLs to check");
 
-  teardown();
+  await teardown();
 });
 
 /**
@@ -368,8 +382,9 @@ add_task(async function test_e2e_allow_empty_urls() {
 add_task(async function test_e2e_allow_url_from_mentioned_tab() {
   setup();
 
-  orchestrator = await SecurityOrchestrator.create("test-session");
-  const ledger = orchestrator.getSessionLedger();
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
 
   // Current tab
   ledger.forTab("tab-1").add("https://example.com");
@@ -377,7 +392,7 @@ add_task(async function test_e2e_allow_url_from_mentioned_tab() {
   // Mentioned tab (different URL)
   ledger.forTab("tab-2").add("https://mozilla.org");
 
-  const decision = await orchestrator.evaluate({
+  const decision = await orchestrator.evaluate(TEST_SESSION_ID, {
     phase: "tool.execution",
     action: {
       type: "tool.call",
@@ -398,7 +413,7 @@ add_task(async function test_e2e_allow_url_from_mentioned_tab() {
     "Should allow URL from @mentioned tab (merged ledger)"
   );
 
-  teardown();
+  await teardown();
 });
 
 /**
@@ -412,13 +427,14 @@ add_task(async function test_e2e_allow_url_from_mentioned_tab() {
 add_task(async function test_e2e_deny_url_not_in_mentioned_tabs() {
   setup();
 
-  orchestrator = await SecurityOrchestrator.create("test-session");
-  const ledger = orchestrator.getSessionLedger();
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
 
   ledger.forTab("tab-1").add("https://example.com");
   ledger.forTab("tab-2").add("https://mozilla.org");
 
-  const decision = await orchestrator.evaluate({
+  const decision = await orchestrator.evaluate(TEST_SESSION_ID, {
     phase: "tool.execution",
     action: {
       type: "tool.call",
@@ -439,7 +455,7 @@ add_task(async function test_e2e_deny_url_not_in_mentioned_tabs() {
     "Should deny URL not in current or @mentioned tabs"
   );
 
-  teardown();
+  await teardown();
 });
 
 /**
@@ -453,11 +469,12 @@ add_task(async function test_e2e_deny_url_not_in_mentioned_tabs() {
 add_task(async function test_e2e_url_normalization_strips_fragments() {
   setup();
 
-  orchestrator = await SecurityOrchestrator.create("test-session");
-  const ledger = orchestrator.getSessionLedger();
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
   ledger.forTab("tab-1").add("https://example.com/page"); // No fragment
 
-  const decision = await orchestrator.evaluate({
+  const decision = await orchestrator.evaluate(TEST_SESSION_ID, {
     phase: "tool.execution",
     action: {
       type: "tool.call",
@@ -478,7 +495,7 @@ add_task(async function test_e2e_url_normalization_strips_fragments() {
     "Should allow after normalizing (fragments stripped)"
   );
 
-  teardown();
+  await teardown();
 });
 
 /**
@@ -495,11 +512,12 @@ add_task(async function test_e2e_pref_switch_bypasses_policies() {
   // Disable security
   Services.prefs.setBoolPref(PREF_SECURITY_ENABLED, false);
 
-  orchestrator = await SecurityOrchestrator.create("test-session");
-  const ledger = orchestrator.getSessionLedger();
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
   ledger.forTab("tab-1"); // Empty ledger
 
-  const decision = await orchestrator.evaluate({
+  const decision = await orchestrator.evaluate(TEST_SESSION_ID, {
     phase: "tool.execution",
     action: {
       type: "tool.call",
@@ -520,5 +538,5 @@ add_task(async function test_e2e_pref_switch_bypasses_policies() {
     "Pref switch OFF: should bypass all policies (allow everything)"
   );
 
-  teardown();
+  await teardown();
 });
diff --git a/toolkit/components/ml/tests/xpcshell/test_policy_evaluator.js b/toolkit/components/ml/tests/xpcshell/test_policy_evaluator.js
@@ -12,12 +12,15 @@
  * Focus: Policy matching, deny/allow effects, multiple conditions
  */
 
-const { SecurityOrchestrator } = ChromeUtils.importESModule(
-  "chrome://global/content/ml/security/SecurityOrchestrator.sys.mjs"
-);
+const { SecurityOrchestrator, getSecurityOrchestrator } =
+  ChromeUtils.importESModule(
+    "chrome://global/content/ml/security/SecurityOrchestrator.sys.mjs"
+  );
 
 const PREF_SECURITY_ENABLED = "browser.ml.security.enabled";
 
+const TEST_SESSION_ID = "test-session";
+
 /** @type {SecurityOrchestrator|null} */
 let orchestrator = null;
 
@@ -25,8 +28,9 @@ function setup() {
   Services.prefs.clearUserPref(PREF_SECURITY_ENABLED);
 }
 
-function teardown() {
+async function teardown() {
   Services.prefs.clearUserPref(PREF_SECURITY_ENABLED);
+  await SecurityOrchestrator.resetForTesting();
   orchestrator = null;
 }
 
@@ -41,12 +45,13 @@ function teardown() {
 add_task(async function test_policy_matches_correct_phase() {
   setup();
 
-  orchestrator = await SecurityOrchestrator.create("test-session");
-  const ledger = orchestrator.getSessionLedger();
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
   ledger.forTab("tab-1");
 
   // tool.execution phase should match our policies
-  const decision = await orchestrator.evaluate({
+  const decision = await orchestrator.evaluate(TEST_SESSION_ID, {
     phase: "tool.execution",
     action: {
       type: "tool.call",
@@ -68,7 +73,7 @@ add_task(async function test_policy_matches_correct_phase() {
   );
   Assert.equal(decision.policyId, "block-unseen-links");
 
-  teardown();
+  await teardown();
 });
 
 /**
@@ -82,12 +87,13 @@ add_task(async function test_policy_matches_correct_phase() {
 add_task(async function test_policy_ignores_unknown_phase() {
   setup();
 
-  orchestrator = await SecurityOrchestrator.create("test-session");
-  const ledger = orchestrator.getSessionLedger();
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
   ledger.forTab("tab-1");
 
   // Unknown phase should not match any policies
-  const decision = await orchestrator.evaluate({
+  const decision = await orchestrator.evaluate(TEST_SESSION_ID, {
     phase: "unknown.phase",
     action: {
       type: "tool.call",
@@ -108,7 +114,7 @@ add_task(async function test_policy_ignores_unknown_phase() {
     "Unknown phase should not match policies (allow by default)"
   );
 
-  teardown();
+  await teardown();
 });
 
 /**
@@ -122,12 +128,13 @@ add_task(async function test_policy_ignores_unknown_phase() {
 add_task(async function test_deny_policy_denies_when_condition_fails() {
   setup();
 
-  orchestrator = await SecurityOrchestrator.create("test-session");
-  const ledger = orchestrator.getSessionLedger();
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
   ledger.forTab("tab-1").add("https://example.com");
 
   // URL not in ledger = condition fails = deny
-  const decision = await orchestrator.evaluate({
+  const decision = await orchestrator.evaluate(TEST_SESSION_ID, {
     phase: "tool.execution",
     action: {
       type: "tool.call",
@@ -148,7 +155,7 @@ add_task(async function test_deny_policy_denies_when_condition_fails() {
   Assert.equal(decision.policyId, "block-unseen-links");
   Assert.ok(decision.details, "Should include failure details");
 
-  teardown();
+  await teardown();
 });
 
 /**
@@ -163,12 +170,13 @@ add_task(
   async function test_deny_policy_passes_through_when_condition_passes() {
     setup();
 
-    orchestrator = await SecurityOrchestrator.create("test-session");
-    const ledger = orchestrator.getSessionLedger();
+    orchestrator = await getSecurityOrchestrator();
+    orchestrator.registerSession(TEST_SESSION_ID);
+    const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
     ledger.forTab("tab-1").add("https://example.com");
 
     // URL in ledger = condition passes = policy doesn't apply (allow)
-    const decision = await orchestrator.evaluate({
+    const decision = await orchestrator.evaluate(TEST_SESSION_ID, {
       phase: "tool.execution",
       action: {
         type: "tool.call",
@@ -189,7 +197,7 @@ add_task(
       "Should allow when deny policy condition passes (policy doesn't apply)"
     );
 
-    teardown();
+    await teardown();
   }
 );
 
@@ -204,12 +212,13 @@ add_task(
 add_task(async function test_policy_checks_all_urls() {
   setup();
 
-  orchestrator = await SecurityOrchestrator.create("test-session");
-  const ledger = orchestrator.getSessionLedger();
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
   ledger.forTab("tab-1").add("https://example.com");
   // Not adding evil.com
 
-  const decision = await orchestrator.evaluate({
+  const decision = await orchestrator.evaluate(TEST_SESSION_ID, {
     phase: "tool.execution",
     action: {
       type: "tool.call",
@@ -233,7 +242,7 @@ add_task(async function test_policy_checks_all_urls() {
     "Should deny if ANY URL fails condition (all-or-nothing)"
   );
 
-  teardown();
+  await teardown();
 });
 
 /**
@@ -247,13 +256,14 @@ add_task(async function test_policy_checks_all_urls() {
 add_task(async function test_policy_allows_when_all_urls_valid() {
   setup();
 
-  orchestrator = await SecurityOrchestrator.create("test-session");
-  const ledger = orchestrator.getSessionLedger();
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
   const tabLedger = ledger.forTab("tab-1");
   tabLedger.add("https://example.com");
   tabLedger.add("https://mozilla.org");
 
-  const decision = await orchestrator.evaluate({
+  const decision = await orchestrator.evaluate(TEST_SESSION_ID, {
     phase: "tool.execution",
     action: {
       type: "tool.call",
@@ -274,7 +284,7 @@ add_task(async function test_policy_allows_when_all_urls_valid() {
     "Should allow when all URLs pass condition"
   );
 
-  teardown();
+  await teardown();
 });
 
 /**
@@ -288,12 +298,13 @@ add_task(async function test_policy_allows_when_all_urls_valid() {
 add_task(async function test_policy_applies_to_get_page_content() {
   setup();
 
-  orchestrator = await SecurityOrchestrator.create("test-session");
-  const ledger = orchestrator.getSessionLedger();
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
   ledger.forTab("tab-1");
 
   // Verify policy applies to get_page_content (the main URL-fetching tool)
-  const decision = await orchestrator.evaluate({
+  const decision = await orchestrator.evaluate(TEST_SESSION_ID, {
     phase: "tool.execution",
     action: {
       type: "tool.call",
@@ -314,7 +325,7 @@ add_task(async function test_policy_applies_to_get_page_content() {
     "Policy should apply to get_page_content"
   );
 
-  teardown();
+  await teardown();
 });
 
 /**
@@ -328,11 +339,12 @@ add_task(async function test_policy_applies_to_get_page_content() {
 add_task(async function test_deny_decision_includes_policy_info() {
   setup();
 
-  orchestrator = await SecurityOrchestrator.create("test-session");
-  const ledger = orchestrator.getSessionLedger();
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
   ledger.forTab("tab-1");
 
-  const decision = await orchestrator.evaluate({
+  const decision = await orchestrator.evaluate(TEST_SESSION_ID, {
     phase: "tool.execution",
     action: {
       type: "tool.call",
@@ -362,5 +374,5 @@ add_task(async function test_deny_decision_includes_policy_info() {
     "Should identify failed condition"
   );
 
-  teardown();
+  await teardown();
 });
diff --git a/toolkit/components/ml/tests/xpcshell/test_security_orchestrator.js b/toolkit/components/ml/tests/xpcshell/test_security_orchestrator.js
@@ -11,14 +11,18 @@
  * - Policy execution (allow/deny with real policies)
  * - Envelope validation (security boundary)
  * - Error handling (fail-closed)
+ * - Session management (register/cleanup)
  */
 
-const { SecurityOrchestrator } = ChromeUtils.importESModule(
-  "chrome://global/content/ml/security/SecurityOrchestrator.sys.mjs"
-);
+const { SecurityOrchestrator, getSecurityOrchestrator } =
+  ChromeUtils.importESModule(
+    "chrome://global/content/ml/security/SecurityOrchestrator.sys.mjs"
+  );
 
 const PREF_SECURITY_ENABLED = "browser.ml.security.enabled";
 
+const TEST_SESSION_ID = "test-session";
+
 /** @type {SecurityOrchestrator|null} */
 let orchestrator = null;
 
@@ -26,8 +30,9 @@ function setup() {
   Services.prefs.clearUserPref(PREF_SECURITY_ENABLED);
 }
 
-function teardown() {
+async function teardown() {
   Services.prefs.clearUserPref(PREF_SECURITY_ENABLED);
+  await SecurityOrchestrator.resetForTesting();
   orchestrator = null;
 }
 
@@ -35,24 +40,25 @@ function teardown() {
  * Test: initialization creates a session with ledger.
  *
  * Reason:
- * SecurityOrchestrator.create() must initialize a functional session
- * with an empty ledger ready for URL seeding. This is the entry point
+ * getSecurityOrchestrator() + registerSession() must initialize a functional
+ * session with an empty ledger ready for URL seeding. This is the entry point
  * for all security layer operations.
  */
 add_task(async function test_initialization_creates_session() {
   setup();
 
-  orchestrator = await SecurityOrchestrator.create("test-session");
-  const ledger = orchestrator.getSessionLedger();
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
 
   Assert.ok(ledger, "Should return session ledger");
   Assert.equal(ledger.tabCount(), 0, "Should start with no tabs");
   Assert.ok(
-    orchestrator.getSessionLedger(),
+    orchestrator.getSessionLedger(TEST_SESSION_ID),
     "Should be able to get session ledger"
   );
 
-  teardown();
+  await teardown();
 });
 
 /**
@@ -67,11 +73,12 @@ add_task(async function test_pref_switch_disabled_allows_everything() {
   setup();
   Services.prefs.setBoolPref(PREF_SECURITY_ENABLED, false);
 
-  orchestrator = await SecurityOrchestrator.create("test-session");
-  const ledger = orchestrator.getSessionLedger();
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
   ledger.forTab("tab-1"); // Empty ledger
 
-  const decision = await orchestrator.evaluate({
+  const decision = await orchestrator.evaluate(TEST_SESSION_ID, {
     phase: "tool.execution",
     action: {
       type: "tool.call",
@@ -92,7 +99,7 @@ add_task(async function test_pref_switch_disabled_allows_everything() {
     "Pref switch OFF: should allow everything (pass-through)"
   );
 
-  teardown();
+  await teardown();
 });
 
 /**
@@ -107,11 +114,12 @@ add_task(async function test_pref_switch_enabled_enforces_policies() {
   setup();
   Services.prefs.setBoolPref(PREF_SECURITY_ENABLED, true);
 
-  orchestrator = await SecurityOrchestrator.create("test-session");
-  const ledger = orchestrator.getSessionLedger();
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
   ledger.forTab("tab-1");
 
-  const decision = await orchestrator.evaluate({
+  const decision = await orchestrator.evaluate(TEST_SESSION_ID, {
     phase: "tool.execution",
     action: {
       type: "tool.call",
@@ -129,7 +137,7 @@ add_task(async function test_pref_switch_enabled_enforces_policies() {
   Assert.equal(decision.effect, "deny", "Pref switch ON: should enforce");
   Assert.equal(decision.code, "UNSEEN_LINK", "Should deny unseen links");
 
-  teardown();
+  await teardown();
 });
 
 /**
@@ -144,8 +152,9 @@ add_task(async function test_pref_switch_runtime_change() {
   setup();
 
   Services.prefs.setBoolPref(PREF_SECURITY_ENABLED, true);
-  orchestrator = await SecurityOrchestrator.create("test-session");
-  const ledger = orchestrator.getSessionLedger();
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
   ledger.forTab("tab-1");
 
   const envelope = {
@@ -164,54 +173,68 @@ add_task(async function test_pref_switch_runtime_change() {
   };
 
   // Should deny when enabled
-  let decision = await orchestrator.evaluate(envelope);
+  let decision = await orchestrator.evaluate(TEST_SESSION_ID, envelope);
   Assert.equal(decision.effect, "deny", "Should deny when enabled");
 
   // Disable at runtime
   Services.prefs.setBoolPref(PREF_SECURITY_ENABLED, false);
 
   // Should allow immediately
-  decision = await orchestrator.evaluate(envelope);
+  decision = await orchestrator.evaluate(TEST_SESSION_ID, envelope);
   Assert.equal(
     decision.effect,
     "allow",
     "Should allow immediately after runtime disable"
   );
 
-  teardown();
+  await teardown();
 });
 
 /**
- * Test: invalid envelope fails closed.
+ * Test: invalid envelope throws error.
  *
  * Reason:
- * Malformed envelopes (missing phase, action, or context) must be
- * denied rather than allowed. Fail-closed behavior ensures that
- * broken or malicious requests don't bypass security checks.
+ * Malformed envelopes (missing phase, action, or context) indicate a
+ * wiring bug. These should throw immediately so tests catch
+ * the issue rather than silently failing.
  */
-add_task(async function test_invalid_envelope_fails_closed() {
+add_task(async function test_invalid_envelope_throws() {
   setup();
   Services.prefs.setBoolPref(PREF_SECURITY_ENABLED, true);
-  orchestrator = await SecurityOrchestrator.create("test-session");
-
-  const invalidEnvelopes = [
-    null,
-    { action: { type: "test" }, context: {} }, // missing phase
-    { phase: "test", context: {} }, // missing action
-    { phase: "test", action: { type: "test" } }, // missing context
-  ];
-
-  for (const envelope of invalidEnvelopes) {
-    const decision = await orchestrator.evaluate(envelope);
-    Assert.equal(
-      decision.effect,
-      "deny",
-      "Invalid envelope should fail closed (deny)"
-    );
-    Assert.equal(decision.code, "INVALID_REQUEST", "Should have correct code");
-  }
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
+
+  await Assert.rejects(
+    orchestrator.evaluate(TEST_SESSION_ID, null),
+    /Security envelope is null or invalid/,
+    "Null envelope should throw"
+  );
 
-  teardown();
+  await Assert.rejects(
+    orchestrator.evaluate(TEST_SESSION_ID, {
+      action: { type: "test" },
+      context: {},
+    }),
+    /Security envelope missing required fields/,
+    "Missing phase should throw"
+  );
+
+  await Assert.rejects(
+    orchestrator.evaluate(TEST_SESSION_ID, { phase: "test", context: {} }),
+    /Security envelope missing required fields/,
+    "Missing action should throw"
+  );
+
+  await Assert.rejects(
+    orchestrator.evaluate(TEST_SESSION_ID, {
+      phase: "test",
+      action: { type: "test" },
+    }),
+    /Security envelope missing required fields/,
+    "Missing context should throw"
+  );
+
+  await teardown();
 });
 
 /**
@@ -225,12 +248,13 @@ add_task(async function test_invalid_envelope_fails_closed() {
 add_task(async function test_policy_allows_seeded_url() {
   setup();
   Services.prefs.setBoolPref(PREF_SECURITY_ENABLED, true);
-  orchestrator = await SecurityOrchestrator.create("test-session");
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
 
-  const ledger = orchestrator.getSessionLedger();
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
   ledger.forTab("tab-1").add("https://example.com");
 
-  const decision = await orchestrator.evaluate({
+  const decision = await orchestrator.evaluate(TEST_SESSION_ID, {
     phase: "tool.execution",
     action: {
       type: "tool.call",
@@ -247,7 +271,7 @@ add_task(async function test_policy_allows_seeded_url() {
 
   Assert.equal(decision.effect, "allow", "Should allow seeded URL");
 
-  teardown();
+  await teardown();
 });
 
 /**
@@ -261,12 +285,13 @@ add_task(async function test_policy_allows_seeded_url() {
 add_task(async function test_policy_denies_unseen_url() {
   setup();
   Services.prefs.setBoolPref(PREF_SECURITY_ENABLED, true);
-  orchestrator = await SecurityOrchestrator.create("test-session");
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
 
-  const ledger = orchestrator.getSessionLedger();
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
   ledger.forTab("tab-1"); // Empty ledger
 
-  const decision = await orchestrator.evaluate({
+  const decision = await orchestrator.evaluate(TEST_SESSION_ID, {
     phase: "tool.execution",
     action: {
       type: "tool.call",
@@ -290,7 +315,7 @@ add_task(async function test_policy_denies_unseen_url() {
     "Should identify policy"
   );
 
-  teardown();
+  await teardown();
 });
 
 /**
@@ -304,12 +329,13 @@ add_task(async function test_policy_denies_unseen_url() {
 add_task(async function test_policy_denies_if_any_url_unseen() {
   setup();
   Services.prefs.setBoolPref(PREF_SECURITY_ENABLED, true);
-  orchestrator = await SecurityOrchestrator.create("test-session");
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
 
-  const ledger = orchestrator.getSessionLedger();
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
   ledger.forTab("tab-1").add("https://example.com");
 
-  const decision = await orchestrator.evaluate({
+  const decision = await orchestrator.evaluate(TEST_SESSION_ID, {
     phase: "tool.execution",
     action: {
       type: "tool.call",
@@ -333,7 +359,7 @@ add_task(async function test_policy_denies_if_any_url_unseen() {
     "Should deny if ANY URL unseen (all-or-nothing)"
   );
 
-  teardown();
+  await teardown();
 });
 
 /**
@@ -347,12 +373,13 @@ add_task(async function test_policy_denies_if_any_url_unseen() {
 add_task(async function test_malformed_url_fails_closed() {
   setup();
   Services.prefs.setBoolPref(PREF_SECURITY_ENABLED, true);
-  orchestrator = await SecurityOrchestrator.create("test-session");
+  orchestrator = await getSecurityOrchestrator();
+  orchestrator.registerSession(TEST_SESSION_ID);
 
-  const ledger = orchestrator.getSessionLedger();
+  const ledger = orchestrator.getSessionLedger(TEST_SESSION_ID);
   ledger.forTab("tab-1");
 
-  const decision = await orchestrator.evaluate({
+  const decision = await orchestrator.evaluate(TEST_SESSION_ID, {
     phase: "tool.execution",
     action: {
       type: "tool.call",
@@ -376,5 +403,172 @@ add_task(async function test_malformed_url_fails_closed() {
   // caught as specifically malformed
   Assert.equal(decision.code, "UNSEEN_LINK", "Should have UNSEEN_LINK code");
 
-  teardown();
+  await teardown();
+});
+
+/**
+ * Test: unknown session throws error.
+ *
+ * Reason:
+ * Requests for unregistered sessions indicate a wiring bug.
+ * These should throw immediately so tests catch the issue.
+ */
+add_task(async function test_unknown_session_throws() {
+  setup();
+  Services.prefs.setBoolPref(PREF_SECURITY_ENABLED, true);
+  orchestrator = await getSecurityOrchestrator();
+  // Note: NOT calling registerSession()
+
+  await Assert.rejects(
+    orchestrator.evaluate("unknown-session", {
+      phase: "tool.execution",
+      action: {
+        type: "tool.call",
+        tool: "get_page_content",
+        urls: ["https://example.com"],
+        tabId: "tab-1",
+      },
+      context: {
+        currentTabId: "tab-1",
+        mentionedTabIds: [],
+        requestId: "test-123",
+      },
+    }),
+    /Session unknown-session is not registered/,
+    "Unknown session should throw"
+  );
+
+  await teardown();
+});
+
+/**
+ * Test: registerSession is idempotent.
+ *
+ * Reason:
+ * Calling registerSession multiple times with the same sessionId should
+ * not create duplicate ledgers or throw errors. This simplifies caller
+ * logic and prevents accidental state corruption.
+ */
+add_task(async function test_register_session_idempotent() {
+  setup();
+  orchestrator = await getSecurityOrchestrator();
+
+  orchestrator.registerSession(TEST_SESSION_ID);
+  const ledger1 = orchestrator.getSessionLedger(TEST_SESSION_ID);
+  ledger1.forTab("tab-1").add("https://example.com");
+
+  // Register again - should not reset the ledger
+  orchestrator.registerSession(TEST_SESSION_ID);
+  const ledger2 = orchestrator.getSessionLedger(TEST_SESSION_ID);
+
+  Assert.equal(ledger1, ledger2, "Should return same ledger instance");
+  Assert.ok(
+    ledger2.forTab("tab-1").has("https://example.com"),
+    "Ledger data should be preserved"
+  );
+
+  await teardown();
+});
+
+/**
+ * Test: cleanupSession removes ledger.
+ *
+ * Reason:
+ * When an AI Window closes, its session ledger must be removed to
+ * prevent memory leaks and ensure clean state.
+ */
+add_task(async function test_cleanup_session_removes_ledger() {
+  setup();
+  orchestrator = await getSecurityOrchestrator();
+
+  orchestrator.registerSession(TEST_SESSION_ID);
+  Assert.ok(
+    orchestrator.getSessionLedger(TEST_SESSION_ID),
+    "Ledger should exist"
+  );
+
+  orchestrator.cleanupSession(TEST_SESSION_ID);
+  Assert.equal(
+    orchestrator.getSessionLedger(TEST_SESSION_ID),
+    undefined,
+    "Ledger should be removed after cleanup"
+  );
+
+  await teardown();
+});
+
+/**
+ * Test: cleanupSession is idempotent.
+ *
+ * Reason:
+ * Calling cleanupSession on a non-existent session should not throw.
+ * This simplifies caller logic and handles edge cases gracefully.
+ */
+add_task(async function test_cleanup_session_idempotent() {
+  setup();
+  orchestrator = await getSecurityOrchestrator();
+
+  // Should not throw even for non-existent session
+  orchestrator.cleanupSession("non-existent-session");
+  orchestrator.cleanupSession("non-existent-session");
+
+  // Also should not throw after already cleaned up
+  orchestrator.registerSession(TEST_SESSION_ID);
+  orchestrator.cleanupSession(TEST_SESSION_ID);
+  orchestrator.cleanupSession(TEST_SESSION_ID);
+
+  await teardown();
+});
+
+/**
+ * Test: registerSession rejects invalid sessionId.
+ *
+ * Reason:
+ * Session IDs must be non-empty strings. Invalid IDs should be
+ * rejected to prevent confusing state or security issues.
+ */
+add_task(async function test_register_session_rejects_invalid_id() {
+  setup();
+  orchestrator = await getSecurityOrchestrator();
+
+  const invalidIds = [null, undefined, "", 123, {}, []];
+
+  for (const invalidId of invalidIds) {
+    Assert.throws(
+      () => orchestrator.registerSession(invalidId),
+      /registerSession requires a non-empty string sessionId/,
+      `Should reject invalid sessionId: ${JSON.stringify(invalidId)}`
+    );
+  }
+
+  await teardown();
+});
+
+/**
+ * Test: getStats returns session count.
+ *
+ * Reason:
+ * The stats should reflect the current number of registered sessions
+ * for debugging and monitoring purposes.
+ */
+add_task(async function test_get_stats_returns_session_count() {
+  setup();
+  orchestrator = await getSecurityOrchestrator();
+
+  let stats = orchestrator.getStats();
+  Assert.equal(stats.sessionCount, 0, "Should start with 0 sessions");
+
+  orchestrator.registerSession("session-1");
+  orchestrator.registerSession("session-2");
+
+  stats = orchestrator.getStats();
+  Assert.equal(stats.sessionCount, 2, "Should have 2 sessions");
+  Assert.ok(stats.sessionStats["session-1"], "Should have stats for session-1");
+  Assert.ok(stats.sessionStats["session-2"], "Should have stats for session-2");
+
+  orchestrator.cleanupSession("session-1");
+  stats = orchestrator.getStats();
+  Assert.equal(stats.sessionCount, 1, "Should have 1 session after cleanup");
+
+  await teardown();
 });

	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE

M	toolkit/components/ml/security/SecurityOrchestrator.sys.mjs	\|	254	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------
M	toolkit/components/ml/tests/xpcshell/test_condition_evaluator.js	\|	75	+++++++++++++++++++++++++++++++++++++++++++--------------------------------
M	toolkit/components/ml/tests/xpcshell/test_json_policy_system.js	\|	120	+++++++++++++++++++++++++++++++++++++++++++++----------------------------------
M	toolkit/components/ml/tests/xpcshell/test_policy_evaluator.js	\|	84	+++++++++++++++++++++++++++++++++++++++++++++----------------------------------
M	toolkit/components/ml/tests/xpcshell/test_security_orchestrator.js	\|	320	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------