commit 3b0c6ee9a2b23a1d8d7d31f323cb97132c39f856
parent 76eabb0febd4db7e9faf628147606c168ae38460
Author: Bastien Orivel <borivel@mozilla.com>
Date: Tue, 6 Jan 2026 11:30:03 +0000
Bug 2008622 - Revert "Bug 2001388 - Don't query hg to get a list of changed files in the decision task". r=releng-reviewers,taskgraph-reviewers,jcristau
This reverts commit 712692c98db377c3224e4bd062794797404106d1.
This was causing serious performance issues on large commit ranges (up
to 1h+ to computer the list of changed files) and we don't have a good way
of making this faster without changing semantics for files-changed so
let's just revert.
Differential Revision: https://phabricator.services.mozilla.com/D277986
Diffstat:
6 files changed, 147 insertions(+), 12 deletions(-)
diff --git a/taskcluster/gecko_taskgraph/decision.py b/taskcluster/gecko_taskgraph/decision.py
@@ -26,6 +26,7 @@ from taskgraph.util.yaml import load_yaml
from . import GECKO
from .actions import render_actions_json
+from .files_changed import get_changed_files
from .parameters import get_app_version, get_version
from .util.backstop import ANDROID_PERFTEST_BACKSTOP_INDEX, BACKSTOP_INDEX, is_backstop
from .util.bugbug import push_schedules
@@ -314,17 +315,9 @@ def get_decision_parameters(graph_config, options):
GECKO, revision=parameters["head_rev"]
)
- changed_files_since_base = set(
- repo.get_changed_files(
- rev=parameters["head_rev"], base=parameters["base_rev"]
- )
+ parameters["files_changed"] = sorted(
+ get_changed_files(parameters["head_repository"], parameters["head_rev"])
)
- if "try" in parameters["project"] and options["tasks_for"] == "hg-push":
- parameters["files_changed"] = sorted(
- set(repo.get_outgoing_files()) | changed_files_since_base
- )
- else:
- parameters["files_changed"] = sorted(changed_files_since_base)
elif parameters["repository_type"] == "git":
parameters["hg_branch"] = None
diff --git a/taskcluster/gecko_taskgraph/files_changed.py b/taskcluster/gecko_taskgraph/files_changed.py
@@ -6,11 +6,73 @@
Support for optimizing tasks based on the set of files that have changed.
"""
+import logging
+import os
from subprocess import CalledProcessError
from mozbuild.util import memoize
+from mozpack.path import join as join_path
+from mozpack.path import match as mozpackmatch
from mozversioncontrol import InvalidRepoPath, get_repository_object
+from gecko_taskgraph import GECKO
+from gecko_taskgraph.util.hg import get_json_pushchangedfiles
+
+logger = logging.getLogger(__name__)
+
+
+@memoize
+def get_changed_files(repository, revision):
+ """
+ Get the set of files changed in the push headed by the given revision.
+ Responses are cached, so multiple calls with the same arguments are OK.
+ """
+ try:
+ return get_json_pushchangedfiles(repository, revision)["files"]
+ except KeyError:
+ # We shouldn't hit this error in CI.
+ if os.environ.get("MOZ_AUTOMATION"):
+ raise
+
+ # We're likely on an unpublished commit, grab changed files from
+ # version control.
+ return get_locally_changed_files(GECKO)
+
+
+def check(params, file_patterns):
+ """Determine whether any of the files changed in the indicated push to
+ https://hg.mozilla.org match any of the given file patterns."""
+ repository = params.get("head_repository")
+ revision = params.get("head_rev")
+ if not repository or not revision:
+ logger.warning(
+ "Missing `head_repository` or `head_rev` parameters; "
+ "assuming all files have changed"
+ )
+ return True
+
+ changed_files = get_changed_files(repository, revision)
+
+ if "comm_head_repository" in params:
+ repository = params.get("comm_head_repository")
+ revision = params.get("comm_head_rev")
+ if not revision:
+ logger.warning(
+ "Missing `comm_head_rev` parameters; " "assuming all files have changed"
+ )
+ return True
+
+ changed_files |= {
+ join_path("comm", file) for file in get_changed_files(repository, revision)
+ }
+
+ for pattern in file_patterns:
+ for path in changed_files:
+ if mozpackmatch(path, pattern):
+ return True
+
+ return False
+
def _get_locally_changed_files(repo):
try:
diff --git a/taskcluster/gecko_taskgraph/test/python.toml b/taskcluster/gecko_taskgraph/test/python.toml
@@ -7,6 +7,8 @@ subsuite = "taskgraph"
["test_decision.py"]
+["test_files_changed.py"]
+
["test_main.py"]
["test_morph.py"]
diff --git a/taskcluster/gecko_taskgraph/test/test_decision.py b/taskcluster/gecko_taskgraph/test/test_decision.py
@@ -76,6 +76,7 @@ def test_write_artifact_yml():
@patch("gecko_taskgraph.decision.get_hg_revision_info")
@patch("gecko_taskgraph.decision.get_hg_revision_branch")
@patch("gecko_taskgraph.decision.get_repository")
+@patch("gecko_taskgraph.decision.get_changed_files")
@pytest.mark.parametrize(
"extra_options,commit_msg,ttc,expected",
(
@@ -131,6 +132,7 @@ def test_write_artifact_yml():
),
)
def test_get_decision_parameters(
+ mock_get_changed_files,
mock_get_repository,
mock_get_hg_revision_branch,
mock_get_hg_revision_info,
@@ -146,9 +148,8 @@ def test_get_decision_parameters(
mock_repo = MagicMock()
mock_repo.default_branch = "baseref"
mock_repo.get_commit_message.return_value = commit_msg or "commit message"
- mock_repo.get_outgoing_files.return_value = ["foo.txt", "bar/baz.md"]
- mock_repo.get_changed_files.return_value = ["foo.txt", "bar/baz.md"]
mock_get_repository.return_value = mock_repo
+ mock_get_changed_files.return_value = ["foo.txt", "bar/baz.md"]
options.update(extra_options)
contents = None
diff --git a/taskcluster/gecko_taskgraph/test/test_files_changed.py b/taskcluster/gecko_taskgraph/test/test_files_changed.py
@@ -0,0 +1,64 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import unittest
+
+from mozunit import main
+
+from gecko_taskgraph import files_changed
+
+PARAMS = {
+ "head_repository": "https://hg.mozilla.org/mozilla-central",
+ "head_rev": "a14f88a9af7a",
+}
+
+FILES_CHANGED = [
+ "devtools/client/debugger/index.html",
+ "devtools/client/inspector/test/browser_inspector_highlighter-eyedropper-events.js",
+ "devtools/client/inspector/test/head.js",
+ "devtools/client/themes/rules.css",
+ "devtools/client/webconsole/test/browser_webconsole_output_06.js",
+ "devtools/server/actors/highlighters/eye-dropper.js",
+ "devtools/server/actors/object.js",
+ "docshell/base/nsDocShell.cpp",
+ "dom/tests/mochitest/general/test_contentViewer_overrideDPPX.html",
+ "taskcluster/scripts/builder/build-l10n.sh",
+]
+
+
+def test_get_changed_files(responses):
+ url = f"{PARAMS['head_repository']}/json-pushchangedfiles/{PARAMS['head_rev']}"
+ responses.add(responses.GET, url, status=200, json={"files": FILES_CHANGED})
+ assert (
+ sorted(
+ files_changed.get_changed_files(
+ PARAMS["head_repository"], PARAMS["head_rev"]
+ )
+ )
+ == FILES_CHANGED
+ )
+
+
+class TestCheck(unittest.TestCase):
+ def setUp(self):
+ files_changed.get_changed_files[
+ PARAMS["head_repository"], PARAMS["head_rev"]
+ ] = FILES_CHANGED
+
+ def tearDown(self):
+ files_changed.get_changed_files.clear()
+
+ def test_check_no_params(self):
+ self.assertTrue(files_changed.check({}, ["ignored"]))
+
+ def test_check_no_match(self):
+ self.assertFalse(files_changed.check(PARAMS, ["nosuch/**"]))
+
+ def test_check_match(self):
+ self.assertTrue(files_changed.check(PARAMS, ["devtools/**"]))
+
+
+if __name__ == "__main__":
+ main()
diff --git a/taskcluster/gecko_taskgraph/util/hg.py b/taskcluster/gecko_taskgraph/util/hg.py
@@ -93,6 +93,19 @@ def get_push_data(repository, project, push_id_start, push_id_end):
return None
+@memoize
+def get_json_pushchangedfiles(repository, revision):
+ url = "{}/json-pushchangedfiles/{}".format(repository.rstrip("/"), revision)
+ logger.debug("Querying version control for metadata: %s", url)
+
+ def get_pushchangedfiles():
+ response = requests.get(url, timeout=60)
+ response.raise_for_status()
+ return response.json()
+
+ return retry(get_pushchangedfiles, attempts=10, sleeptime=10)
+
+
def get_hg_revision_branch(root, revision):
"""Given the parameters for a revision, find the hg_branch (aka
relbranch) of the revision."""