commit a99dfbcddf35ceb514d059180e6ce3b180659c90
parent 5024760ece3972b42a128ad032b57e67e8cb104b
Author: Suhaib Mujahid <suhaibmujahid@gmail.com>
Date: Thu, 13 Nov 2025 14:43:47 +0000
Bug 1998977 - Add `--auto` test selection using BugBug recommendations. r=marco,taskgraph-reviewers,bhearsum
Introduces a new `--auto` flag to the test command, enabling automatic test selection based on local changes using BugBug's patch_schedules API. Adds auto_selector.py for generating patches and computing patch hashes, and updates mach_commands.py to integrate BugBug recommendations. Also updates bugbug.py to support patch-based scheduling queries.
Differential Revision: https://phabricator.services.mozilla.com/D272116
Diffstat:
4 files changed, 193 insertions(+), 2 deletions(-)
diff --git a/python/mozversioncontrol/mozversioncontrol/factory.py b/python/mozversioncontrol/mozversioncontrol/factory.py
@@ -7,7 +7,7 @@ import re
import subprocess
import sys
from pathlib import Path
-from typing import Optional, Union
+from typing import Literal, Optional, Union, overload
from packaging.version import Version
@@ -36,6 +36,30 @@ VCS_CLASSES: dict[str, type] = {
}
+@overload
+def get_specific_repository_object(
+ data: str, output_format: Literal["git"]
+) -> GitRepository: ...
+
+
+@overload
+def get_specific_repository_object(
+ data: str, output_format: Literal["hg"]
+) -> HgRepository: ...
+
+
+@overload
+def get_specific_repository_object(
+ data: str, output_format: Literal["jj"]
+) -> JujutsuRepository: ...
+
+
+@overload
+def get_specific_repository_object(
+ data: str, output_format: Literal["src"]
+) -> SrcRepository: ...
+
+
def get_specific_repository_object(path: Optional[Union[str, Path]], vcs: str):
"""Return a repository object for the given VCS and path."""
resolved_path = Path(path).resolve()
diff --git a/python/mozversioncontrol/mozversioncontrol/repo/git.py b/python/mozversioncontrol/mozversioncontrol/repo/git.py
@@ -187,6 +187,12 @@ class GitRepository(Repository):
return None
return email.strip()
+ def get_user_name(self):
+ name = self._run("config", "user.name", return_codes=[0, 1])
+ if not name:
+ return None
+ return name.strip()
+
def get_changed_files(self, diff_filter="ADM", mode="unstaged", rev=None):
assert all(f.lower() in self._valid_diff_filter for f in diff_filter)
@@ -684,3 +690,34 @@ class GitRepository(Repository):
print(f"Copying {watchman_sample} to {watchman_config}")
subprocess.check_call(copy_cmd, cwd=str(self.path))
self.set_config_key_value(key="core.fsmonitor", value=str(watchman_config))
+
+ def get_patches_after_ref(self, base_ref) -> str:
+ """
+ Retrieve git format-patch style patches of all commits that occurred
+ after `base_ref`.
+ """
+ return self._run("format-patch", f"{base_ref}..HEAD", "--stdout")
+
+ def get_patch_for_uncommitted_changes(
+ self, message: str = "[PATCH] Uncommitted changes", date: datetime = None
+ ) -> str:
+ """
+ Generate a git format-patch style patch of all uncommitted changes in
+ the working directory.
+ """
+ if not date:
+ date = datetime.now()
+
+ name = self.get_user_name()
+ email = self.get_user_email()
+ formatted_date = date.strftime("%a %b %d %H:%M:%S %Y %z")
+
+ patch = [
+ "From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001",
+ f"From: {name} <{email}>",
+ f"Date: {formatted_date}",
+ f"Subject: {message}" "\n---\n",
+ self._run("diff", "--no-color", "HEAD"),
+ ]
+
+ return "\n".join(patch)
diff --git a/taskcluster/gecko_taskgraph/util/bugbug.py b/taskcluster/gecko_taskgraph/util/bugbug.py
@@ -171,3 +171,77 @@ def push_schedules(branch, rev):
}
return data
+
+
+@memoize
+def patch_schedules(base_rev, patch_content, mode="quick"):
+ """Query BugBug API with a patch to get test recommendations.
+
+ This is used by `./mach test --auto` to get test recommendations for local changes.
+
+ Args:
+ base_rev (str): The base revision hash.
+ patch_content (str): The patch content with commit metadata.
+ mode (str): The mode of test selection, which determines the confidence
+ threshold. One of 'extensive', 'moderate', or 'quick'.
+ Returns:
+ dict: A dictionary with containing test recommendations filtered by
+ confidence threshold.
+
+ Raises:
+ BugbugTimeoutException: If the API times out.
+ """
+
+ import hashlib
+ import re
+
+ # This ensures consistent hashing across multiple runs with identical
+ # changes by stripping the date before hashing.
+ filtered_content = re.sub(r"^Date: .*$", "", patch_content, flags=re.MULTILINE)
+ patch_hash = hashlib.md5(filtered_content.encode("utf-8")).hexdigest()
+
+ url = BUGBUG_BASE_URL + f"/patch/{base_rev}/{patch_hash}/schedules"
+ # FIXME: Remove fallback once BugBug is fully migrated.
+ url = url.replace(BUGBUG_BASE_URL, BUGBUG_BASE_FALLBACK_URL)
+
+ session = get_session()
+
+ r = session.post(
+ url,
+ data=patch_content.encode("utf-8"),
+ headers={"Content-Type": "text/plain"},
+ )
+ r.raise_for_status()
+
+ timeout = RETRY_TIMEOUT
+ attempts = timeout / RETRY_INTERVAL
+ i = 0
+ while i < attempts:
+ if r.status_code != 202:
+ break
+
+ time.sleep(RETRY_INTERVAL)
+ r = session.get(url)
+ r.raise_for_status()
+ i += 1
+
+ data = r.json()
+ if r.status_code == 202:
+ raise BugbugTimeoutException(f"Timed out waiting for result from '{url}'")
+
+ if mode == "extensive":
+ confidence_threshold = CT_LOW
+ elif mode == "moderate":
+ confidence_threshold = CT_MEDIUM
+ elif mode == "quick":
+ confidence_threshold = CT_HIGH
+ else:
+ raise ValueError(
+ f"Invalid mode: '{mode}'; expected one of 'extensive', 'moderate', 'quick'"
+ )
+
+ return {
+ translate_group(k): v
+ for k, v in data["groups"].items()
+ if v >= confidence_threshold
+ }
diff --git a/testing/mach_commands.py b/testing/mach_commands.py
@@ -54,7 +54,7 @@ def get_test_parser():
parser.add_argument(
"what",
default=None,
- nargs="+",
+ nargs="*",
help=TEST_HELP.format(", ".join(sorted(TEST_SUITES))),
)
parser.add_argument(
@@ -72,6 +72,18 @@ def get_test_parser():
nargs="?",
help="Specify a debugger to use.",
)
+ parser.add_argument(
+ "--auto",
+ nargs="?",
+ const="quick",
+ default=False,
+ choices=["extensive", "moderate", "quick"],
+ metavar="LEVEL",
+ help="Automatically select tests based on local changes using BugBug. "
+ "Optional confidence level: 'extensive' (more tests), 'moderate' or "
+ "'quick' (fewer tests with highest confidence to be related). "
+ "Default: quick",
+ )
add_logging_group(parser)
return parser
@@ -376,6 +388,50 @@ def test(command_context, what, extra_args, **log_args):
from mozlog.handlers import ResourceHandler, StreamHandler
from moztest.resolve import TEST_SUITES, TestResolver, get_suite_definition
+ if not log_args.get("auto") and not what:
+ print("Error: You must specify test paths or use --auto flag")
+ return 1
+
+ if log_args.get("auto"):
+ from itertools import chain
+
+ from gecko_taskgraph.util.bugbug import patch_schedules
+ from mozversioncontrol.factory import get_specific_repository_object
+
+ if what:
+ print(
+ "Note: when using --auto, any test paths specified will be combined with BugBug's recommendations."
+ )
+
+ selection_mode = log_args.get("auto")
+
+ repo = get_specific_repository_object(".", "git")
+ base_commit = repo.base_ref_as_commit()
+ patch = "\n".join(
+ [
+ repo.get_patches_after_ref(base_commit),
+ repo.get_patch_for_uncommitted_changes(),
+ ]
+ )
+
+ print(
+ f"Querying BugBug for test recommendations... (based on changes after {base_commit[:8]})"
+ )
+ schedules = patch_schedules(base_commit, patch, selection_mode)
+
+ if not schedules:
+ print(
+ "BugBug did not recommend any tests for your changes. Consider specifying tests by path or suite name."
+ )
+ return 0
+
+ test_paths = sorted(schedules.keys())
+ print(f"BugBug recommended {len(test_paths)} test group(s):")
+ for path in test_paths:
+ print(f" {path} (confidence: {schedules[path]:.2f})")
+
+ what = set(chain(what, test_paths))
+
resolver = command_context._spawn(TestResolver)
run_suites, run_tests = resolver.resolve_metadata(what)