tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit a99dfbcddf35ceb514d059180e6ce3b180659c90
parent 5024760ece3972b42a128ad032b57e67e8cb104b
Author: Suhaib Mujahid <suhaibmujahid@gmail.com>
Date:   Thu, 13 Nov 2025 14:43:47 +0000

Bug 1998977 - Add `--auto` test selection using BugBug recommendations. r=marco,taskgraph-reviewers,bhearsum

Introduces a new `--auto` flag to the test command, enabling automatic test selection based on local changes using BugBug's patch_schedules API. Adds auto_selector.py for generating patches and computing patch hashes, and updates mach_commands.py to integrate BugBug recommendations. Also updates bugbug.py to support patch-based scheduling queries.

Differential Revision: https://phabricator.services.mozilla.com/D272116

Diffstat:
Mpython/mozversioncontrol/mozversioncontrol/factory.py | 26+++++++++++++++++++++++++-
Mpython/mozversioncontrol/mozversioncontrol/repo/git.py | 37+++++++++++++++++++++++++++++++++++++
Mtaskcluster/gecko_taskgraph/util/bugbug.py | 74++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtesting/mach_commands.py | 58+++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
4 files changed, 193 insertions(+), 2 deletions(-)

diff --git a/python/mozversioncontrol/mozversioncontrol/factory.py b/python/mozversioncontrol/mozversioncontrol/factory.py @@ -7,7 +7,7 @@ import re import subprocess import sys from pathlib import Path -from typing import Optional, Union +from typing import Literal, Optional, Union, overload from packaging.version import Version @@ -36,6 +36,30 @@ VCS_CLASSES: dict[str, type] = { } +@overload +def get_specific_repository_object( + data: str, output_format: Literal["git"] +) -> GitRepository: ... + + +@overload +def get_specific_repository_object( + data: str, output_format: Literal["hg"] +) -> HgRepository: ... + + +@overload +def get_specific_repository_object( + data: str, output_format: Literal["jj"] +) -> JujutsuRepository: ... + + +@overload +def get_specific_repository_object( + data: str, output_format: Literal["src"] +) -> SrcRepository: ... + + def get_specific_repository_object(path: Optional[Union[str, Path]], vcs: str): """Return a repository object for the given VCS and path.""" resolved_path = Path(path).resolve() diff --git a/python/mozversioncontrol/mozversioncontrol/repo/git.py b/python/mozversioncontrol/mozversioncontrol/repo/git.py @@ -187,6 +187,12 @@ class GitRepository(Repository): return None return email.strip() + def get_user_name(self): + name = self._run("config", "user.name", return_codes=[0, 1]) + if not name: + return None + return name.strip() + def get_changed_files(self, diff_filter="ADM", mode="unstaged", rev=None): assert all(f.lower() in self._valid_diff_filter for f in diff_filter) @@ -684,3 +690,34 @@ class GitRepository(Repository): print(f"Copying {watchman_sample} to {watchman_config}") subprocess.check_call(copy_cmd, cwd=str(self.path)) self.set_config_key_value(key="core.fsmonitor", value=str(watchman_config)) + + def get_patches_after_ref(self, base_ref) -> str: + """ + Retrieve git format-patch style patches of all commits that occurred + after `base_ref`. + """ + return self._run("format-patch", f"{base_ref}..HEAD", "--stdout") + + def get_patch_for_uncommitted_changes( + self, message: str = "[PATCH] Uncommitted changes", date: datetime = None + ) -> str: + """ + Generate a git format-patch style patch of all uncommitted changes in + the working directory. + """ + if not date: + date = datetime.now() + + name = self.get_user_name() + email = self.get_user_email() + formatted_date = date.strftime("%a %b %d %H:%M:%S %Y %z") + + patch = [ + "From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001", + f"From: {name} <{email}>", + f"Date: {formatted_date}", + f"Subject: {message}" "\n---\n", + self._run("diff", "--no-color", "HEAD"), + ] + + return "\n".join(patch) diff --git a/taskcluster/gecko_taskgraph/util/bugbug.py b/taskcluster/gecko_taskgraph/util/bugbug.py @@ -171,3 +171,77 @@ def push_schedules(branch, rev): } return data + + +@memoize +def patch_schedules(base_rev, patch_content, mode="quick"): + """Query BugBug API with a patch to get test recommendations. + + This is used by `./mach test --auto` to get test recommendations for local changes. + + Args: + base_rev (str): The base revision hash. + patch_content (str): The patch content with commit metadata. + mode (str): The mode of test selection, which determines the confidence + threshold. One of 'extensive', 'moderate', or 'quick'. + Returns: + dict: A dictionary with containing test recommendations filtered by + confidence threshold. + + Raises: + BugbugTimeoutException: If the API times out. + """ + + import hashlib + import re + + # This ensures consistent hashing across multiple runs with identical + # changes by stripping the date before hashing. + filtered_content = re.sub(r"^Date: .*$", "", patch_content, flags=re.MULTILINE) + patch_hash = hashlib.md5(filtered_content.encode("utf-8")).hexdigest() + + url = BUGBUG_BASE_URL + f"/patch/{base_rev}/{patch_hash}/schedules" + # FIXME: Remove fallback once BugBug is fully migrated. + url = url.replace(BUGBUG_BASE_URL, BUGBUG_BASE_FALLBACK_URL) + + session = get_session() + + r = session.post( + url, + data=patch_content.encode("utf-8"), + headers={"Content-Type": "text/plain"}, + ) + r.raise_for_status() + + timeout = RETRY_TIMEOUT + attempts = timeout / RETRY_INTERVAL + i = 0 + while i < attempts: + if r.status_code != 202: + break + + time.sleep(RETRY_INTERVAL) + r = session.get(url) + r.raise_for_status() + i += 1 + + data = r.json() + if r.status_code == 202: + raise BugbugTimeoutException(f"Timed out waiting for result from '{url}'") + + if mode == "extensive": + confidence_threshold = CT_LOW + elif mode == "moderate": + confidence_threshold = CT_MEDIUM + elif mode == "quick": + confidence_threshold = CT_HIGH + else: + raise ValueError( + f"Invalid mode: '{mode}'; expected one of 'extensive', 'moderate', 'quick'" + ) + + return { + translate_group(k): v + for k, v in data["groups"].items() + if v >= confidence_threshold + } diff --git a/testing/mach_commands.py b/testing/mach_commands.py @@ -54,7 +54,7 @@ def get_test_parser(): parser.add_argument( "what", default=None, - nargs="+", + nargs="*", help=TEST_HELP.format(", ".join(sorted(TEST_SUITES))), ) parser.add_argument( @@ -72,6 +72,18 @@ def get_test_parser(): nargs="?", help="Specify a debugger to use.", ) + parser.add_argument( + "--auto", + nargs="?", + const="quick", + default=False, + choices=["extensive", "moderate", "quick"], + metavar="LEVEL", + help="Automatically select tests based on local changes using BugBug. " + "Optional confidence level: 'extensive' (more tests), 'moderate' or " + "'quick' (fewer tests with highest confidence to be related). " + "Default: quick", + ) add_logging_group(parser) return parser @@ -376,6 +388,50 @@ def test(command_context, what, extra_args, **log_args): from mozlog.handlers import ResourceHandler, StreamHandler from moztest.resolve import TEST_SUITES, TestResolver, get_suite_definition + if not log_args.get("auto") and not what: + print("Error: You must specify test paths or use --auto flag") + return 1 + + if log_args.get("auto"): + from itertools import chain + + from gecko_taskgraph.util.bugbug import patch_schedules + from mozversioncontrol.factory import get_specific_repository_object + + if what: + print( + "Note: when using --auto, any test paths specified will be combined with BugBug's recommendations." + ) + + selection_mode = log_args.get("auto") + + repo = get_specific_repository_object(".", "git") + base_commit = repo.base_ref_as_commit() + patch = "\n".join( + [ + repo.get_patches_after_ref(base_commit), + repo.get_patch_for_uncommitted_changes(), + ] + ) + + print( + f"Querying BugBug for test recommendations... (based on changes after {base_commit[:8]})" + ) + schedules = patch_schedules(base_commit, patch, selection_mode) + + if not schedules: + print( + "BugBug did not recommend any tests for your changes. Consider specifying tests by path or suite name." + ) + return 0 + + test_paths = sorted(schedules.keys()) + print(f"BugBug recommended {len(test_paths)} test group(s):") + for path in test_paths: + print(f" {path} (confidence: {schedules[path]:.2f})") + + what = set(chain(what, test_paths)) + resolver = command_context._spawn(TestResolver) run_suites, run_tests = resolver.resolve_metadata(what)