tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit 934a439e0e1655d0ee64b28ce6cdfea7db3dd35b
parent 4b85427200b8777c3b5aa49f9804409bc9910d5a
Author: serge-sans-paille <sguelton@mozilla.com>
Date:   Tue, 16 Dec 2025 13:47:13 +0000

Bug 2002796 - Prefetch computation of gecko_taskgraph.files_changed.get_locally_changed_files r=jmaher,taskgraph-reviewers,jcristau

When get_locally_changed_files invokes git, it calls a subcommand that
takes ~700ms. This is quite slow and blocks the startup of several
taskgraph-related tasks.

We usually ask for the files changed in the main repo, so it maybe worth
prefetching that value in a separate thread. GIL is not an issue here as
we invoke a subprocess.

Differential Revision: https://phabricator.services.mozilla.com/D274297

Diffstat:
Mtaskcluster/gecko_taskgraph/files_changed.py | 57++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Mtaskcluster/mach_commands.py | 5+++++
2 files changed, 59 insertions(+), 3 deletions(-)

diff --git a/taskcluster/gecko_taskgraph/files_changed.py b/taskcluster/gecko_taskgraph/files_changed.py @@ -12,10 +12,61 @@ from mozbuild.util import memoize from mozversioncontrol import InvalidRepoPath, get_repository_object -@memoize -def get_locally_changed_files(repo): +def _get_locally_changed_files(repo): try: vcs = get_repository_object(repo) - return set(vcs.get_outgoing_files("AM")) + s = set(vcs.get_outgoing_files("AM")) + return s except (InvalidRepoPath, CalledProcessError): return set() + + +class PreloadedGetLocallyChangedFiles: + """ + Function-like class that performs eager computation of _get_locally_changed_files + for what looks the default repo. + + The rationale is the following: + - computing _get_locally_changed_files is relatively slow (~600ms) + - it's already done through an external command + + So we do that in a background thread as soon as possible, so that at the + point when we need the result, it's already `prefetched'. + """ + + def __init__(self): + self.preloaded_repo = None + self.preloading_thread = None + self.preloaded_answer = None + + def preload(self, repo): + """ + Fire off preloading of get_locally_changed_files(repo). + + For the sake of simplicity, there can be only one preloaded repo. + """ + import threading + from pathlib import Path + + if self.preloaded_repo is not None: + raise ValueError("Can only preload one repo") + + self.preloaded_repo = Path(repo) + + def preloading(): + self.preloaded_answer = _get_locally_changed_files(self.preloaded_repo) + + self.preloading_thread = threading.Thread(target=preloading, daemon=True) + self.preloading_thread.start() + + @memoize + def __call__(self, repo): + if repo == self.preloaded_repo: + # A thread can be joined many times, but it's going to happen only + # once, thanks to @memoize. + self.preloading_thread.join() + return self.preloaded_answer + return _get_locally_changed_files(repo) + + +get_locally_changed_files = PreloadedGetLocallyChangedFiles() diff --git a/taskcluster/mach_commands.py b/taskcluster/mach_commands.py @@ -14,11 +14,16 @@ import traceback from functools import partial import gecko_taskgraph.main +from gecko_taskgraph.files_changed import get_locally_changed_files from gecko_taskgraph.main import commands as taskgraph_commands from mach.decorators import Command, CommandArgument, SubCommand from mach.util import strtobool from mozsystemmonitor.resourcemonitor import SystemResourceMonitor +# We're likely going to need the result of get_locally_changed_files, and it +# takes time to finish, so prefetch it as soon as possible. +get_locally_changed_files.preload(os.getcwd()) + def setup_logging(command_context, quiet=False, verbose=True): """