commit 712692c98db377c3224e4bd062794797404106d1
parent 77cd2bd4edabe1d58c087483bf725e198014ba81
Author: Bastien Orivel <borivel@mozilla.com>
Date: Tue, 25 Nov 2025 14:52:59 +0000
Bug 2001388 - Don't query hg to get a list of changed files in the decision task. r=releng-reviewers,taskgraph-reviewers,ahal
This should (given very unscientigic observations of a few tasks) save between 10 and 40s on every decision task because hgmo is so slow to respond and we already have the data we need in the checkout to get that list.
The one thing that is worth noting is that we can't just get the files changed for the current push on try because the files changed in that case on the hgmo side was returning all of the files changed since the first non-draft ancestor. Thankfully that was very easy to fix because that is what the `get_outgoing_files` method returns.
Differential Revision: https://phabricator.services.mozilla.com/D273447
Diffstat:
6 files changed, 12 insertions(+), 147 deletions(-)
diff --git a/taskcluster/gecko_taskgraph/decision.py b/taskcluster/gecko_taskgraph/decision.py
@@ -26,7 +26,6 @@ from taskgraph.util.yaml import load_yaml
from . import GECKO
from .actions import render_actions_json
-from .files_changed import get_changed_files
from .parameters import get_app_version, get_version
from .util.backstop import ANDROID_PERFTEST_BACKSTOP_INDEX, BACKSTOP_INDEX, is_backstop
from .util.bugbug import push_schedules
@@ -315,9 +314,17 @@ def get_decision_parameters(graph_config, options):
GECKO, revision=parameters["head_rev"]
)
- parameters["files_changed"] = sorted(
- get_changed_files(parameters["head_repository"], parameters["head_rev"])
+ changed_files_since_base = set(
+ repo.get_changed_files(
+ rev=parameters["head_rev"], base=parameters["base_rev"]
+ )
)
+ if "try" in parameters["project"] and options["tasks_for"] == "hg-push":
+ parameters["files_changed"] = sorted(
+ set(repo.get_outgoing_files()) | changed_files_since_base
+ )
+ else:
+ parameters["files_changed"] = sorted(changed_files_since_base)
elif parameters["repository_type"] == "git":
parameters["hg_branch"] = None
diff --git a/taskcluster/gecko_taskgraph/files_changed.py b/taskcluster/gecko_taskgraph/files_changed.py
@@ -6,73 +6,11 @@
Support for optimizing tasks based on the set of files that have changed.
"""
-import logging
-import os
from subprocess import CalledProcessError
from mozbuild.util import memoize
-from mozpack.path import join as join_path
-from mozpack.path import match as mozpackmatch
from mozversioncontrol import InvalidRepoPath, get_repository_object
-from gecko_taskgraph import GECKO
-from gecko_taskgraph.util.hg import get_json_pushchangedfiles
-
-logger = logging.getLogger(__name__)
-
-
-@memoize
-def get_changed_files(repository, revision):
- """
- Get the set of files changed in the push headed by the given revision.
- Responses are cached, so multiple calls with the same arguments are OK.
- """
- try:
- return get_json_pushchangedfiles(repository, revision)["files"]
- except KeyError:
- # We shouldn't hit this error in CI.
- if os.environ.get("MOZ_AUTOMATION"):
- raise
-
- # We're likely on an unpublished commit, grab changed files from
- # version control.
- return get_locally_changed_files(GECKO)
-
-
-def check(params, file_patterns):
- """Determine whether any of the files changed in the indicated push to
- https://hg.mozilla.org match any of the given file patterns."""
- repository = params.get("head_repository")
- revision = params.get("head_rev")
- if not repository or not revision:
- logger.warning(
- "Missing `head_repository` or `head_rev` parameters; "
- "assuming all files have changed"
- )
- return True
-
- changed_files = get_changed_files(repository, revision)
-
- if "comm_head_repository" in params:
- repository = params.get("comm_head_repository")
- revision = params.get("comm_head_rev")
- if not revision:
- logger.warning(
- "Missing `comm_head_rev` parameters; " "assuming all files have changed"
- )
- return True
-
- changed_files |= {
- join_path("comm", file) for file in get_changed_files(repository, revision)
- }
-
- for pattern in file_patterns:
- for path in changed_files:
- if mozpackmatch(path, pattern):
- return True
-
- return False
-
@memoize
def get_locally_changed_files(repo):
diff --git a/taskcluster/gecko_taskgraph/test/python.toml b/taskcluster/gecko_taskgraph/test/python.toml
@@ -7,8 +7,6 @@ subsuite = "taskgraph"
["test_decision.py"]
-["test_files_changed.py"]
-
["test_main.py"]
["test_morph.py"]
diff --git a/taskcluster/gecko_taskgraph/test/test_decision.py b/taskcluster/gecko_taskgraph/test/test_decision.py
@@ -76,7 +76,6 @@ def test_write_artifact_yml():
@patch("gecko_taskgraph.decision.get_hg_revision_info")
@patch("gecko_taskgraph.decision.get_hg_revision_branch")
@patch("gecko_taskgraph.decision.get_repository")
-@patch("gecko_taskgraph.decision.get_changed_files")
@pytest.mark.parametrize(
"extra_options,commit_msg,ttc,expected",
(
@@ -132,7 +131,6 @@ def test_write_artifact_yml():
),
)
def test_get_decision_parameters(
- mock_get_changed_files,
mock_get_repository,
mock_get_hg_revision_branch,
mock_get_hg_revision_info,
@@ -148,8 +146,9 @@ def test_get_decision_parameters(
mock_repo = MagicMock()
mock_repo.default_branch = "baseref"
mock_repo.get_commit_message.return_value = commit_msg or "commit message"
+ mock_repo.get_outgoing_files.return_value = ["foo.txt", "bar/baz.md"]
+ mock_repo.get_changed_files.return_value = ["foo.txt", "bar/baz.md"]
mock_get_repository.return_value = mock_repo
- mock_get_changed_files.return_value = ["foo.txt", "bar/baz.md"]
options.update(extra_options)
contents = None
diff --git a/taskcluster/gecko_taskgraph/test/test_files_changed.py b/taskcluster/gecko_taskgraph/test/test_files_changed.py
@@ -1,64 +0,0 @@
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-
-import unittest
-
-from mozunit import main
-
-from gecko_taskgraph import files_changed
-
-PARAMS = {
- "head_repository": "https://hg.mozilla.org/mozilla-central",
- "head_rev": "a14f88a9af7a",
-}
-
-FILES_CHANGED = [
- "devtools/client/debugger/index.html",
- "devtools/client/inspector/test/browser_inspector_highlighter-eyedropper-events.js",
- "devtools/client/inspector/test/head.js",
- "devtools/client/themes/rules.css",
- "devtools/client/webconsole/test/browser_webconsole_output_06.js",
- "devtools/server/actors/highlighters/eye-dropper.js",
- "devtools/server/actors/object.js",
- "docshell/base/nsDocShell.cpp",
- "dom/tests/mochitest/general/test_contentViewer_overrideDPPX.html",
- "taskcluster/scripts/builder/build-l10n.sh",
-]
-
-
-def test_get_changed_files(responses):
- url = f"{PARAMS['head_repository']}/json-pushchangedfiles/{PARAMS['head_rev']}"
- responses.add(responses.GET, url, status=200, json={"files": FILES_CHANGED})
- assert (
- sorted(
- files_changed.get_changed_files(
- PARAMS["head_repository"], PARAMS["head_rev"]
- )
- )
- == FILES_CHANGED
- )
-
-
-class TestCheck(unittest.TestCase):
- def setUp(self):
- files_changed.get_changed_files[
- PARAMS["head_repository"], PARAMS["head_rev"]
- ] = FILES_CHANGED
-
- def tearDown(self):
- files_changed.get_changed_files.clear()
-
- def test_check_no_params(self):
- self.assertTrue(files_changed.check({}, ["ignored"]))
-
- def test_check_no_match(self):
- self.assertFalse(files_changed.check(PARAMS, ["nosuch/**"]))
-
- def test_check_match(self):
- self.assertTrue(files_changed.check(PARAMS, ["devtools/**"]))
-
-
-if __name__ == "__main__":
- main()
diff --git a/taskcluster/gecko_taskgraph/util/hg.py b/taskcluster/gecko_taskgraph/util/hg.py
@@ -93,19 +93,6 @@ def get_push_data(repository, project, push_id_start, push_id_end):
return None
-@memoize
-def get_json_pushchangedfiles(repository, revision):
- url = "{}/json-pushchangedfiles/{}".format(repository.rstrip("/"), revision)
- logger.debug("Querying version control for metadata: %s", url)
-
- def get_pushchangedfiles():
- response = requests.get(url, timeout=60)
- response.raise_for_status()
- return response.json()
-
- return retry(get_pushchangedfiles, attempts=10, sleeptime=10)
-
-
def get_hg_revision_branch(root, revision):
"""Given the parameters for a revision, find the hg_branch (aka
relbranch) of the revision."""