[ tor-browser ].git.dasho

commit a99dfbcddf35ceb514d059180e6ce3b180659c90
parent 5024760ece3972b42a128ad032b57e67e8cb104b
Author: Suhaib Mujahid <suhaibmujahid@gmail.com>
Date:   Thu, 13 Nov 2025 14:43:47 +0000

Bug 1998977 - Add `--auto` test selection using BugBug recommendations. r=marco,taskgraph-reviewers,bhearsum

Introduces a new `--auto` flag to the test command, enabling automatic test selection based on local changes using BugBug's patch_schedules API. Adds auto_selector.py for generating patches and computing patch hashes, and updates mach_commands.py to integrate BugBug recommendations. Also updates bugbug.py to support patch-based scheduling queries.

Differential Revision: https://phabricator.services.mozilla.com/D272116

Diffstat:
M python/mozversioncontrol/mozversioncontrol/factory.py  | 26 +++++++++++++++++++++++++-
M python/mozversioncontrol/mozversioncontrol/repo/git.py  | 37 +++++++++++++++++++++++++++++++++++++
M taskcluster/gecko_taskgraph/util/bugbug.py  | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M testing/mach_commands.py  | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-

4 files changed, 193 insertions(+), 2 deletions(-)
diff --git a/python/mozversioncontrol/mozversioncontrol/factory.py b/python/mozversioncontrol/mozversioncontrol/factory.py
@@ -7,7 +7,7 @@ import re
 import subprocess
 import sys
 from pathlib import Path
-from typing import Optional, Union
+from typing import Literal, Optional, Union, overload
 
 from packaging.version import Version
 
@@ -36,6 +36,30 @@ VCS_CLASSES: dict[str, type] = {
 }
 
 
+@overload
+def get_specific_repository_object(
+    data: str, output_format: Literal["git"]
+) -> GitRepository: ...
+
+
+@overload
+def get_specific_repository_object(
+    data: str, output_format: Literal["hg"]
+) -> HgRepository: ...
+
+
+@overload
+def get_specific_repository_object(
+    data: str, output_format: Literal["jj"]
+) -> JujutsuRepository: ...
+
+
+@overload
+def get_specific_repository_object(
+    data: str, output_format: Literal["src"]
+) -> SrcRepository: ...
+
+
 def get_specific_repository_object(path: Optional[Union[str, Path]], vcs: str):
     """Return a repository object for the given VCS and path."""
     resolved_path = Path(path).resolve()
diff --git a/python/mozversioncontrol/mozversioncontrol/repo/git.py b/python/mozversioncontrol/mozversioncontrol/repo/git.py
@@ -187,6 +187,12 @@ class GitRepository(Repository):
             return None
         return email.strip()
 
+    def get_user_name(self):
+        name = self._run("config", "user.name", return_codes=[0, 1])
+        if not name:
+            return None
+        return name.strip()
+
     def get_changed_files(self, diff_filter="ADM", mode="unstaged", rev=None):
         assert all(f.lower() in self._valid_diff_filter for f in diff_filter)
 
@@ -684,3 +690,34 @@ class GitRepository(Repository):
             print(f"Copying {watchman_sample} to {watchman_config}")
             subprocess.check_call(copy_cmd, cwd=str(self.path))
         self.set_config_key_value(key="core.fsmonitor", value=str(watchman_config))
+
+    def get_patches_after_ref(self, base_ref) -> str:
+        """
+        Retrieve git format-patch style patches of all commits that occurred
+        after `base_ref`.
+        """
+        return self._run("format-patch", f"{base_ref}..HEAD", "--stdout")
+
+    def get_patch_for_uncommitted_changes(
+        self, message: str = "[PATCH] Uncommitted changes", date: datetime = None
+    ) -> str:
+        """
+        Generate a git format-patch style patch of all uncommitted changes in
+        the working directory.
+        """
+        if not date:
+            date = datetime.now()
+
+        name = self.get_user_name()
+        email = self.get_user_email()
+        formatted_date = date.strftime("%a %b %d %H:%M:%S %Y %z")
+
+        patch = [
+            "From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001",
+            f"From: {name} <{email}>",
+            f"Date: {formatted_date}",
+            f"Subject: {message}" "\n---\n",
+            self._run("diff", "--no-color", "HEAD"),
+        ]
+
+        return "\n".join(patch)
diff --git a/taskcluster/gecko_taskgraph/util/bugbug.py b/taskcluster/gecko_taskgraph/util/bugbug.py
@@ -171,3 +171,77 @@ def push_schedules(branch, rev):
         }
 
     return data
+
+
+@memoize
+def patch_schedules(base_rev, patch_content, mode="quick"):
+    """Query BugBug API with a patch to get test recommendations.
+
+    This is used by `./mach test --auto` to get test recommendations for local changes.
+
+    Args:
+        base_rev (str): The base revision hash.
+        patch_content (str): The patch content with commit metadata.
+        mode (str): The mode of test selection, which determines the confidence
+            threshold. One of 'extensive', 'moderate', or 'quick'.
+    Returns:
+        dict: A dictionary with containing test recommendations filtered by
+            confidence threshold.
+
+    Raises:
+        BugbugTimeoutException: If the API times out.
+    """
+
+    import hashlib
+    import re
+
+    # This ensures consistent hashing across multiple runs with identical
+    # changes by stripping the date before hashing.
+    filtered_content = re.sub(r"^Date: .*$", "", patch_content, flags=re.MULTILINE)
+    patch_hash = hashlib.md5(filtered_content.encode("utf-8")).hexdigest()
+
+    url = BUGBUG_BASE_URL + f"/patch/{base_rev}/{patch_hash}/schedules"
+    # FIXME: Remove fallback once BugBug is fully migrated.
+    url = url.replace(BUGBUG_BASE_URL, BUGBUG_BASE_FALLBACK_URL)
+
+    session = get_session()
+
+    r = session.post(
+        url,
+        data=patch_content.encode("utf-8"),
+        headers={"Content-Type": "text/plain"},
+    )
+    r.raise_for_status()
+
+    timeout = RETRY_TIMEOUT
+    attempts = timeout / RETRY_INTERVAL
+    i = 0
+    while i < attempts:
+        if r.status_code != 202:
+            break
+
+        time.sleep(RETRY_INTERVAL)
+        r = session.get(url)
+        r.raise_for_status()
+        i += 1
+
+    data = r.json()
+    if r.status_code == 202:
+        raise BugbugTimeoutException(f"Timed out waiting for result from '{url}'")
+
+    if mode == "extensive":
+        confidence_threshold = CT_LOW
+    elif mode == "moderate":
+        confidence_threshold = CT_MEDIUM
+    elif mode == "quick":
+        confidence_threshold = CT_HIGH
+    else:
+        raise ValueError(
+            f"Invalid mode: '{mode}'; expected one of 'extensive', 'moderate', 'quick'"
+        )
+
+    return {
+        translate_group(k): v
+        for k, v in data["groups"].items()
+        if v >= confidence_threshold
+    }
diff --git a/testing/mach_commands.py b/testing/mach_commands.py
@@ -54,7 +54,7 @@ def get_test_parser():
     parser.add_argument(
         "what",
         default=None,
-        nargs="+",
+        nargs="*",
         help=TEST_HELP.format(", ".join(sorted(TEST_SUITES))),
     )
     parser.add_argument(
@@ -72,6 +72,18 @@ def get_test_parser():
         nargs="?",
         help="Specify a debugger to use.",
     )
+    parser.add_argument(
+        "--auto",
+        nargs="?",
+        const="quick",
+        default=False,
+        choices=["extensive", "moderate", "quick"],
+        metavar="LEVEL",
+        help="Automatically select tests based on local changes using BugBug. "
+        "Optional confidence level: 'extensive' (more tests), 'moderate' or "
+        "'quick' (fewer tests with highest confidence to be related). "
+        "Default: quick",
+    )
     add_logging_group(parser)
     return parser
 
@@ -376,6 +388,50 @@ def test(command_context, what, extra_args, **log_args):
     from mozlog.handlers import ResourceHandler, StreamHandler
     from moztest.resolve import TEST_SUITES, TestResolver, get_suite_definition
 
+    if not log_args.get("auto") and not what:
+        print("Error: You must specify test paths or use --auto flag")
+        return 1
+
+    if log_args.get("auto"):
+        from itertools import chain
+
+        from gecko_taskgraph.util.bugbug import patch_schedules
+        from mozversioncontrol.factory import get_specific_repository_object
+
+        if what:
+            print(
+                "Note: when using --auto, any test paths specified will be combined with BugBug's recommendations."
+            )
+
+        selection_mode = log_args.get("auto")
+
+        repo = get_specific_repository_object(".", "git")
+        base_commit = repo.base_ref_as_commit()
+        patch = "\n".join(
+            [
+                repo.get_patches_after_ref(base_commit),
+                repo.get_patch_for_uncommitted_changes(),
+            ]
+        )
+
+        print(
+            f"Querying BugBug for test recommendations... (based on changes after {base_commit[:8]})"
+        )
+        schedules = patch_schedules(base_commit, patch, selection_mode)
+
+        if not schedules:
+            print(
+                "BugBug did not recommend any tests for your changes. Consider specifying tests by path or suite name."
+            )
+            return 0
+
+        test_paths = sorted(schedules.keys())
+        print(f"BugBug recommended {len(test_paths)} test group(s):")
+        for path in test_paths:
+            print(f"  {path} (confidence: {schedules[path]:.2f})")
+
+        what = set(chain(what, test_paths))
+
     resolver = command_context._spawn(TestResolver)
     run_suites, run_tests = resolver.resolve_metadata(what)

	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE

M	python/mozversioncontrol/mozversioncontrol/factory.py	\|	26	+++++++++++++++++++++++++-
M	python/mozversioncontrol/mozversioncontrol/repo/git.py	\|	37	+++++++++++++++++++++++++++++++++++++
M	taskcluster/gecko_taskgraph/util/bugbug.py	\|	74	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	testing/mach_commands.py	\|	58	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++-