commit 934a439e0e1655d0ee64b28ce6cdfea7db3dd35b
parent 4b85427200b8777c3b5aa49f9804409bc9910d5a
Author: serge-sans-paille <sguelton@mozilla.com>
Date: Tue, 16 Dec 2025 13:47:13 +0000
Bug 2002796 - Prefetch computation of gecko_taskgraph.files_changed.get_locally_changed_files r=jmaher,taskgraph-reviewers,jcristau
When get_locally_changed_files invokes git, it calls a subcommand that
takes ~700ms. This is quite slow and blocks the startup of several
taskgraph-related tasks.
We usually ask for the files changed in the main repo, so it maybe worth
prefetching that value in a separate thread. GIL is not an issue here as
we invoke a subprocess.
Differential Revision: https://phabricator.services.mozilla.com/D274297
Diffstat:
2 files changed, 59 insertions(+), 3 deletions(-)
diff --git a/taskcluster/gecko_taskgraph/files_changed.py b/taskcluster/gecko_taskgraph/files_changed.py
@@ -12,10 +12,61 @@ from mozbuild.util import memoize
from mozversioncontrol import InvalidRepoPath, get_repository_object
-@memoize
-def get_locally_changed_files(repo):
+def _get_locally_changed_files(repo):
try:
vcs = get_repository_object(repo)
- return set(vcs.get_outgoing_files("AM"))
+ s = set(vcs.get_outgoing_files("AM"))
+ return s
except (InvalidRepoPath, CalledProcessError):
return set()
+
+
+class PreloadedGetLocallyChangedFiles:
+ """
+ Function-like class that performs eager computation of _get_locally_changed_files
+ for what looks the default repo.
+
+ The rationale is the following:
+ - computing _get_locally_changed_files is relatively slow (~600ms)
+ - it's already done through an external command
+
+ So we do that in a background thread as soon as possible, so that at the
+ point when we need the result, it's already `prefetched'.
+ """
+
+ def __init__(self):
+ self.preloaded_repo = None
+ self.preloading_thread = None
+ self.preloaded_answer = None
+
+ def preload(self, repo):
+ """
+ Fire off preloading of get_locally_changed_files(repo).
+
+ For the sake of simplicity, there can be only one preloaded repo.
+ """
+ import threading
+ from pathlib import Path
+
+ if self.preloaded_repo is not None:
+ raise ValueError("Can only preload one repo")
+
+ self.preloaded_repo = Path(repo)
+
+ def preloading():
+ self.preloaded_answer = _get_locally_changed_files(self.preloaded_repo)
+
+ self.preloading_thread = threading.Thread(target=preloading, daemon=True)
+ self.preloading_thread.start()
+
+ @memoize
+ def __call__(self, repo):
+ if repo == self.preloaded_repo:
+ # A thread can be joined many times, but it's going to happen only
+ # once, thanks to @memoize.
+ self.preloading_thread.join()
+ return self.preloaded_answer
+ return _get_locally_changed_files(repo)
+
+
+get_locally_changed_files = PreloadedGetLocallyChangedFiles()
diff --git a/taskcluster/mach_commands.py b/taskcluster/mach_commands.py
@@ -14,11 +14,16 @@ import traceback
from functools import partial
import gecko_taskgraph.main
+from gecko_taskgraph.files_changed import get_locally_changed_files
from gecko_taskgraph.main import commands as taskgraph_commands
from mach.decorators import Command, CommandArgument, SubCommand
from mach.util import strtobool
from mozsystemmonitor.resourcemonitor import SystemResourceMonitor
+# We're likely going to need the result of get_locally_changed_files, and it
+# takes time to finish, so prefetch it as soon as possible.
+get_locally_changed_files.preload(os.getcwd())
+
def setup_logging(command_context, quiet=False, verbose=True):
"""