tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit 350594fefb744453f81d77784ffb687f1a494c8e
parent f1ee43069142169608c5f53fd04c4015de9357f1
Author: Alex Hochheiden <ahochheiden@mozilla.com>
Date:   Mon,  8 Dec 2025 19:59:08 +0000

Bug 2003373 - Add validation for toolchain resources in `taskgraph` sparse profile r=nalexander,ahal,taskgraph-reviewers

Differential Revision: https://phabricator.services.mozilla.com/D274658

Diffstat:
Mtaskcluster/docs/attributes.rst | 6++++++
Mtaskcluster/gecko_taskgraph/transforms/job/toolchain.py | 3+++
Ataskcluster/gecko_taskgraph/util/sparse_profiles.py | 68++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtaskcluster/gecko_taskgraph/util/verify.py | 37+++++++++++++++++++++++++++++++++++++
4 files changed, 114 insertions(+), 0 deletions(-)

diff --git a/taskcluster/docs/attributes.rst b/taskcluster/docs/attributes.rst @@ -252,6 +252,12 @@ toolchain-artifact ================== For toolchain jobs, this is the path to the artifact for that toolchain. +toolchain-resources +=================== +For toolchain jobs, this is the list of paths that contribute to the toolchain's +digest hash. These include the build script, any resources specified in the task +definition, and the tooltool manifest (if present). + toolchain-extract ================= Control whether toolchain should be automatically extracted after download. diff --git a/taskcluster/gecko_taskgraph/transforms/job/toolchain.py b/taskcluster/gecko_taskgraph/transforms/job/toolchain.py @@ -83,6 +83,9 @@ def get_digest_data(config, run, taskdesc): if tooltool_manifest: files.append(tooltool_manifest) + # Store resources as an attribute for verification + taskdesc.setdefault("attributes", {})["toolchain-resources"] = sorted(files) + # Accumulate dependency hashes for index generation. data = [hash_paths(GECKO, files)] diff --git a/taskcluster/gecko_taskgraph/util/sparse_profiles.py b/taskcluster/gecko_taskgraph/util/sparse_profiles.py @@ -0,0 +1,68 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import functools +from pathlib import Path + +from gecko_taskgraph import GECKO + + +@functools.cache +def _get_taskgraph_sparse_profile(): + """ + Parse the taskgraph sparse profile and return the paths and globs it includes. + """ + + # We need this nested function to handle %include directives recursively + def parse(profile_path): + paths = set() + globs = set() + + full_path = Path(GECKO) / profile_path + if not full_path.exists(): + raise FileNotFoundError( + f"Sparse profile '{full_path.stem}' not found at {full_path}" + ) + + for raw_line in full_path.read_text().splitlines(): + line = raw_line.strip() + if not line or line.startswith("#") or line.startswith("["): + continue + if line.startswith("%include "): + included_profile = line[len("%include ") :].strip() + included_paths, included_globs = parse(included_profile) + paths.update(included_paths) + globs.update(included_globs) + elif line.startswith("path:"): + path = line[len("path:") :].strip() + paths.add(Path(path)) + elif line.startswith("glob:"): + glob = line[len("glob:") :].strip() + globs.add(glob) + + return paths, globs + + return parse("build/sparse-profiles/taskgraph") + + +@functools.cache +def is_path_covered_by_taskgraph_sparse_profile(path): + """ + Check if a given path would be included in the taskgraph sparse checkout. + """ + profile_paths, profile_globs = _get_taskgraph_sparse_profile() + path = Path(path) + + for profile_path in profile_paths: + if path == profile_path or profile_path in path.parents: + return True + + # Path.match requires at least one directory for ** patterns to match + # root-level files, so we prepend a fake parent directory + path_with_parent = Path("_", path) + for pattern in profile_globs: + if path_with_parent.match(pattern): + return True + + return False diff --git a/taskcluster/gecko_taskgraph/util/verify.py b/taskcluster/gecko_taskgraph/util/verify.py @@ -23,6 +23,9 @@ from gecko_taskgraph.util.attributes import ( RUN_ON_PROJECT_ALIASES, ) from gecko_taskgraph.util.constants import TEST_KINDS +from gecko_taskgraph.util.sparse_profiles import ( + is_path_covered_by_taskgraph_sparse_profile, +) logger = logging.getLogger(__name__) doc_base_path = os.path.join(GECKO, "taskcluster", "docs") @@ -298,6 +301,40 @@ def verify_required_signoffs(task, taskgraph, scratch_pad, graph_config, paramet @verifications.add("full_task_graph") +def verify_toolchain_resources_in_sparse_profile( + task, taskgraph, scratch_pad, graph_config, parameters +): + """ + Verify that all toolchain resources are covered by the taskgraph sparse profile. + If not, the decision task's sparse checkout won't have these files, + causing incorrect hashes and breaking 'mach bootstrap' for developers. + """ + if task is not None: + if task.kind != "toolchain": + return + resources = task.attributes.get("toolchain-resources", []) + uncovered = [ + f for f in resources if not is_path_covered_by_taskgraph_sparse_profile(f) + ] + if uncovered: + uncovered_list = "\n".join(f" path:{path}" for path in uncovered) + scratch_pad.setdefault("errors", []).append( + f"Toolchain '{task.label}' has resources not covered " + f"by the taskgraph sparse profile.\n" + f"Uncovered resources:\n{uncovered_list}" + ) + else: + errors = scratch_pad.get("errors", []) + if errors: + raise Exception( + "Found toolchain resource(s) not covered by taskgraph sparse profile.\n" + "This will cause incorrect hashes in the decision task.\n\n" + + "\n\n".join(errors) + + "\n\nTo fix, add the above path(s) to 'build/sparse-profiles/taskgraph'." + ) + + +@verifications.add("full_task_graph") def verify_aliases(task, taskgraph, scratch_pad, graph_config, parameters): """ This function verifies that aliases are not reused.