tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit 3b0c6ee9a2b23a1d8d7d31f323cb97132c39f856
parent 76eabb0febd4db7e9faf628147606c168ae38460
Author: Bastien Orivel <borivel@mozilla.com>
Date:   Tue,  6 Jan 2026 11:30:03 +0000

Bug 2008622 - Revert "Bug 2001388 - Don't query hg to get a list of changed files in the decision task". r=releng-reviewers,taskgraph-reviewers,jcristau

This reverts commit 712692c98db377c3224e4bd062794797404106d1.

This was causing serious performance issues on large commit ranges (up
to 1h+ to computer the list of changed files) and we don't have a good way
of making this faster without changing semantics for files-changed so
let's just revert.

Differential Revision: https://phabricator.services.mozilla.com/D277986

Diffstat:
Mtaskcluster/gecko_taskgraph/decision.py | 13+++----------
Mtaskcluster/gecko_taskgraph/files_changed.py | 62++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtaskcluster/gecko_taskgraph/test/python.toml | 2++
Mtaskcluster/gecko_taskgraph/test/test_decision.py | 5+++--
Ataskcluster/gecko_taskgraph/test/test_files_changed.py | 64++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtaskcluster/gecko_taskgraph/util/hg.py | 13+++++++++++++
6 files changed, 147 insertions(+), 12 deletions(-)

diff --git a/taskcluster/gecko_taskgraph/decision.py b/taskcluster/gecko_taskgraph/decision.py @@ -26,6 +26,7 @@ from taskgraph.util.yaml import load_yaml from . import GECKO from .actions import render_actions_json +from .files_changed import get_changed_files from .parameters import get_app_version, get_version from .util.backstop import ANDROID_PERFTEST_BACKSTOP_INDEX, BACKSTOP_INDEX, is_backstop from .util.bugbug import push_schedules @@ -314,17 +315,9 @@ def get_decision_parameters(graph_config, options): GECKO, revision=parameters["head_rev"] ) - changed_files_since_base = set( - repo.get_changed_files( - rev=parameters["head_rev"], base=parameters["base_rev"] - ) + parameters["files_changed"] = sorted( + get_changed_files(parameters["head_repository"], parameters["head_rev"]) ) - if "try" in parameters["project"] and options["tasks_for"] == "hg-push": - parameters["files_changed"] = sorted( - set(repo.get_outgoing_files()) | changed_files_since_base - ) - else: - parameters["files_changed"] = sorted(changed_files_since_base) elif parameters["repository_type"] == "git": parameters["hg_branch"] = None diff --git a/taskcluster/gecko_taskgraph/files_changed.py b/taskcluster/gecko_taskgraph/files_changed.py @@ -6,11 +6,73 @@ Support for optimizing tasks based on the set of files that have changed. """ +import logging +import os from subprocess import CalledProcessError from mozbuild.util import memoize +from mozpack.path import join as join_path +from mozpack.path import match as mozpackmatch from mozversioncontrol import InvalidRepoPath, get_repository_object +from gecko_taskgraph import GECKO +from gecko_taskgraph.util.hg import get_json_pushchangedfiles + +logger = logging.getLogger(__name__) + + +@memoize +def get_changed_files(repository, revision): + """ + Get the set of files changed in the push headed by the given revision. + Responses are cached, so multiple calls with the same arguments are OK. + """ + try: + return get_json_pushchangedfiles(repository, revision)["files"] + except KeyError: + # We shouldn't hit this error in CI. + if os.environ.get("MOZ_AUTOMATION"): + raise + + # We're likely on an unpublished commit, grab changed files from + # version control. + return get_locally_changed_files(GECKO) + + +def check(params, file_patterns): + """Determine whether any of the files changed in the indicated push to + https://hg.mozilla.org match any of the given file patterns.""" + repository = params.get("head_repository") + revision = params.get("head_rev") + if not repository or not revision: + logger.warning( + "Missing `head_repository` or `head_rev` parameters; " + "assuming all files have changed" + ) + return True + + changed_files = get_changed_files(repository, revision) + + if "comm_head_repository" in params: + repository = params.get("comm_head_repository") + revision = params.get("comm_head_rev") + if not revision: + logger.warning( + "Missing `comm_head_rev` parameters; " "assuming all files have changed" + ) + return True + + changed_files |= { + join_path("comm", file) for file in get_changed_files(repository, revision) + } + + for pattern in file_patterns: + for path in changed_files: + if mozpackmatch(path, pattern): + return True + + return False + def _get_locally_changed_files(repo): try: diff --git a/taskcluster/gecko_taskgraph/test/python.toml b/taskcluster/gecko_taskgraph/test/python.toml @@ -7,6 +7,8 @@ subsuite = "taskgraph" ["test_decision.py"] +["test_files_changed.py"] + ["test_main.py"] ["test_morph.py"] diff --git a/taskcluster/gecko_taskgraph/test/test_decision.py b/taskcluster/gecko_taskgraph/test/test_decision.py @@ -76,6 +76,7 @@ def test_write_artifact_yml(): @patch("gecko_taskgraph.decision.get_hg_revision_info") @patch("gecko_taskgraph.decision.get_hg_revision_branch") @patch("gecko_taskgraph.decision.get_repository") +@patch("gecko_taskgraph.decision.get_changed_files") @pytest.mark.parametrize( "extra_options,commit_msg,ttc,expected", ( @@ -131,6 +132,7 @@ def test_write_artifact_yml(): ), ) def test_get_decision_parameters( + mock_get_changed_files, mock_get_repository, mock_get_hg_revision_branch, mock_get_hg_revision_info, @@ -146,9 +148,8 @@ def test_get_decision_parameters( mock_repo = MagicMock() mock_repo.default_branch = "baseref" mock_repo.get_commit_message.return_value = commit_msg or "commit message" - mock_repo.get_outgoing_files.return_value = ["foo.txt", "bar/baz.md"] - mock_repo.get_changed_files.return_value = ["foo.txt", "bar/baz.md"] mock_get_repository.return_value = mock_repo + mock_get_changed_files.return_value = ["foo.txt", "bar/baz.md"] options.update(extra_options) contents = None diff --git a/taskcluster/gecko_taskgraph/test/test_files_changed.py b/taskcluster/gecko_taskgraph/test/test_files_changed.py @@ -0,0 +1,64 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +import unittest + +from mozunit import main + +from gecko_taskgraph import files_changed + +PARAMS = { + "head_repository": "https://hg.mozilla.org/mozilla-central", + "head_rev": "a14f88a9af7a", +} + +FILES_CHANGED = [ + "devtools/client/debugger/index.html", + "devtools/client/inspector/test/browser_inspector_highlighter-eyedropper-events.js", + "devtools/client/inspector/test/head.js", + "devtools/client/themes/rules.css", + "devtools/client/webconsole/test/browser_webconsole_output_06.js", + "devtools/server/actors/highlighters/eye-dropper.js", + "devtools/server/actors/object.js", + "docshell/base/nsDocShell.cpp", + "dom/tests/mochitest/general/test_contentViewer_overrideDPPX.html", + "taskcluster/scripts/builder/build-l10n.sh", +] + + +def test_get_changed_files(responses): + url = f"{PARAMS['head_repository']}/json-pushchangedfiles/{PARAMS['head_rev']}" + responses.add(responses.GET, url, status=200, json={"files": FILES_CHANGED}) + assert ( + sorted( + files_changed.get_changed_files( + PARAMS["head_repository"], PARAMS["head_rev"] + ) + ) + == FILES_CHANGED + ) + + +class TestCheck(unittest.TestCase): + def setUp(self): + files_changed.get_changed_files[ + PARAMS["head_repository"], PARAMS["head_rev"] + ] = FILES_CHANGED + + def tearDown(self): + files_changed.get_changed_files.clear() + + def test_check_no_params(self): + self.assertTrue(files_changed.check({}, ["ignored"])) + + def test_check_no_match(self): + self.assertFalse(files_changed.check(PARAMS, ["nosuch/**"])) + + def test_check_match(self): + self.assertTrue(files_changed.check(PARAMS, ["devtools/**"])) + + +if __name__ == "__main__": + main() diff --git a/taskcluster/gecko_taskgraph/util/hg.py b/taskcluster/gecko_taskgraph/util/hg.py @@ -93,6 +93,19 @@ def get_push_data(repository, project, push_id_start, push_id_end): return None +@memoize +def get_json_pushchangedfiles(repository, revision): + url = "{}/json-pushchangedfiles/{}".format(repository.rstrip("/"), revision) + logger.debug("Querying version control for metadata: %s", url) + + def get_pushchangedfiles(): + response = requests.get(url, timeout=60) + response.raise_for_status() + return response.json() + + return retry(get_pushchangedfiles, attempts=10, sleeptime=10) + + def get_hg_revision_branch(root, revision): """Given the parameters for a revision, find the hg_branch (aka relbranch) of the revision."""