commit 350594fefb744453f81d77784ffb687f1a494c8e
parent f1ee43069142169608c5f53fd04c4015de9357f1
Author: Alex Hochheiden <ahochheiden@mozilla.com>
Date: Mon, 8 Dec 2025 19:59:08 +0000
Bug 2003373 - Add validation for toolchain resources in `taskgraph` sparse profile r=nalexander,ahal,taskgraph-reviewers
Differential Revision: https://phabricator.services.mozilla.com/D274658
Diffstat:
4 files changed, 114 insertions(+), 0 deletions(-)
diff --git a/taskcluster/docs/attributes.rst b/taskcluster/docs/attributes.rst
@@ -252,6 +252,12 @@ toolchain-artifact
==================
For toolchain jobs, this is the path to the artifact for that toolchain.
+toolchain-resources
+===================
+For toolchain jobs, this is the list of paths that contribute to the toolchain's
+digest hash. These include the build script, any resources specified in the task
+definition, and the tooltool manifest (if present).
+
toolchain-extract
=================
Control whether toolchain should be automatically extracted after download.
diff --git a/taskcluster/gecko_taskgraph/transforms/job/toolchain.py b/taskcluster/gecko_taskgraph/transforms/job/toolchain.py
@@ -83,6 +83,9 @@ def get_digest_data(config, run, taskdesc):
if tooltool_manifest:
files.append(tooltool_manifest)
+ # Store resources as an attribute for verification
+ taskdesc.setdefault("attributes", {})["toolchain-resources"] = sorted(files)
+
# Accumulate dependency hashes for index generation.
data = [hash_paths(GECKO, files)]
diff --git a/taskcluster/gecko_taskgraph/util/sparse_profiles.py b/taskcluster/gecko_taskgraph/util/sparse_profiles.py
@@ -0,0 +1,68 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import functools
+from pathlib import Path
+
+from gecko_taskgraph import GECKO
+
+
+@functools.cache
+def _get_taskgraph_sparse_profile():
+ """
+ Parse the taskgraph sparse profile and return the paths and globs it includes.
+ """
+
+ # We need this nested function to handle %include directives recursively
+ def parse(profile_path):
+ paths = set()
+ globs = set()
+
+ full_path = Path(GECKO) / profile_path
+ if not full_path.exists():
+ raise FileNotFoundError(
+ f"Sparse profile '{full_path.stem}' not found at {full_path}"
+ )
+
+ for raw_line in full_path.read_text().splitlines():
+ line = raw_line.strip()
+ if not line or line.startswith("#") or line.startswith("["):
+ continue
+ if line.startswith("%include "):
+ included_profile = line[len("%include ") :].strip()
+ included_paths, included_globs = parse(included_profile)
+ paths.update(included_paths)
+ globs.update(included_globs)
+ elif line.startswith("path:"):
+ path = line[len("path:") :].strip()
+ paths.add(Path(path))
+ elif line.startswith("glob:"):
+ glob = line[len("glob:") :].strip()
+ globs.add(glob)
+
+ return paths, globs
+
+ return parse("build/sparse-profiles/taskgraph")
+
+
+@functools.cache
+def is_path_covered_by_taskgraph_sparse_profile(path):
+ """
+ Check if a given path would be included in the taskgraph sparse checkout.
+ """
+ profile_paths, profile_globs = _get_taskgraph_sparse_profile()
+ path = Path(path)
+
+ for profile_path in profile_paths:
+ if path == profile_path or profile_path in path.parents:
+ return True
+
+ # Path.match requires at least one directory for ** patterns to match
+ # root-level files, so we prepend a fake parent directory
+ path_with_parent = Path("_", path)
+ for pattern in profile_globs:
+ if path_with_parent.match(pattern):
+ return True
+
+ return False
diff --git a/taskcluster/gecko_taskgraph/util/verify.py b/taskcluster/gecko_taskgraph/util/verify.py
@@ -23,6 +23,9 @@ from gecko_taskgraph.util.attributes import (
RUN_ON_PROJECT_ALIASES,
)
from gecko_taskgraph.util.constants import TEST_KINDS
+from gecko_taskgraph.util.sparse_profiles import (
+ is_path_covered_by_taskgraph_sparse_profile,
+)
logger = logging.getLogger(__name__)
doc_base_path = os.path.join(GECKO, "taskcluster", "docs")
@@ -298,6 +301,40 @@ def verify_required_signoffs(task, taskgraph, scratch_pad, graph_config, paramet
@verifications.add("full_task_graph")
+def verify_toolchain_resources_in_sparse_profile(
+ task, taskgraph, scratch_pad, graph_config, parameters
+):
+ """
+ Verify that all toolchain resources are covered by the taskgraph sparse profile.
+ If not, the decision task's sparse checkout won't have these files,
+ causing incorrect hashes and breaking 'mach bootstrap' for developers.
+ """
+ if task is not None:
+ if task.kind != "toolchain":
+ return
+ resources = task.attributes.get("toolchain-resources", [])
+ uncovered = [
+ f for f in resources if not is_path_covered_by_taskgraph_sparse_profile(f)
+ ]
+ if uncovered:
+ uncovered_list = "\n".join(f" path:{path}" for path in uncovered)
+ scratch_pad.setdefault("errors", []).append(
+ f"Toolchain '{task.label}' has resources not covered "
+ f"by the taskgraph sparse profile.\n"
+ f"Uncovered resources:\n{uncovered_list}"
+ )
+ else:
+ errors = scratch_pad.get("errors", [])
+ if errors:
+ raise Exception(
+ "Found toolchain resource(s) not covered by taskgraph sparse profile.\n"
+ "This will cause incorrect hashes in the decision task.\n\n"
+ + "\n\n".join(errors)
+ + "\n\nTo fix, add the above path(s) to 'build/sparse-profiles/taskgraph'."
+ )
+
+
+@verifications.add("full_task_graph")
def verify_aliases(task, taskgraph, scratch_pad, graph_config, parameters):
"""
This function verifies that aliases are not reused.