verify.py (18893B)
1 # This Source Code Form is subject to the terms of the Mozilla Public 2 # License, v. 2.0. If a copy of the MPL was not distributed with this 3 # file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 5 6 import datetime 7 import logging 8 import os 9 import re 10 import sys 11 import warnings 12 13 import attr 14 from taskcluster.utils import fromNow 15 from taskgraph.util.keyed_by import evaluate_keyed_by 16 from taskgraph.util.treeherder import join_symbol 17 from taskgraph.util.verify import VerificationSequence 18 19 from gecko_taskgraph import GECKO 20 from gecko_taskgraph.util.attributes import ( 21 ALL_PROJECTS, 22 RELEASE_PROJECTS, 23 RUN_ON_PROJECT_ALIASES, 24 ) 25 from gecko_taskgraph.util.constants import TEST_KINDS 26 from gecko_taskgraph.util.sparse_profiles import ( 27 is_path_covered_by_taskgraph_sparse_profile, 28 ) 29 30 logger = logging.getLogger(__name__) 31 doc_base_path = os.path.join(GECKO, "taskcluster", "docs") 32 33 34 verifications = VerificationSequence() 35 36 37 @attr.s(frozen=True) 38 class DocPaths: 39 _paths = attr.ib(factory=list) 40 41 def get_files(self, filename): 42 rv = [] 43 for p in self._paths: 44 doc_path = os.path.join(p, filename) 45 if os.path.exists(doc_path): 46 rv.append(doc_path) 47 return rv 48 49 def add(self, path): 50 """ 51 Projects that make use of Firefox's taskgraph can extend it with 52 their own task kinds by registering additional paths for documentation. 53 documentation_paths.add() needs to be called by the project's Taskgraph 54 registration function. See taskgraph.config. 55 """ 56 self._paths.append(path) 57 58 59 documentation_paths = DocPaths() 60 documentation_paths.add(doc_base_path) 61 62 63 def verify_docs(filename, identifiers, appearing_as): 64 """ 65 Look for identifiers of the type appearing_as in the files 66 returned by documentation_paths.get_files(). Firefox will have 67 a single file in a list, but projects such as Thunderbird can have 68 documentation in another location and may return multiple files. 69 """ 70 # We ignore identifiers starting with '_' for the sake of tests. 71 # Strings starting with "_" are ignored for doc verification 72 # hence they can be used for faking test values 73 doc_files = documentation_paths.get_files(filename) 74 doctext = "".join([open(d).read() for d in doc_files]) 75 76 if appearing_as == "inline-literal": 77 expression_list = [ 78 "``" + identifier + "``" 79 for identifier in identifiers 80 if not identifier.startswith("_") 81 ] 82 elif appearing_as == "heading": 83 expression_list = [ 84 "\n" + identifier + "\n(?:(?:(?:-+\n)+)|(?:(?:.+\n)+))" 85 for identifier in identifiers 86 if not identifier.startswith("_") 87 ] 88 else: 89 raise Exception(f"appearing_as = `{appearing_as}` not defined") 90 91 for expression, identifier in zip(expression_list, identifiers): 92 match_group = re.search(expression, doctext) 93 if not match_group: 94 raise Exception( 95 f"{appearing_as}: `{identifier}` missing from doc file: `{filename}`" 96 ) 97 98 99 @verifications.add("initial") 100 def verify_run_using(): 101 from gecko_taskgraph.transforms.job import registry 102 103 verify_docs( 104 filename="transforms/job.rst", 105 identifiers=registry.keys(), 106 appearing_as="inline-literal", 107 ) 108 109 110 @verifications.add("parameters") 111 def verify_parameters_docs(parameters): 112 if not parameters.strict: 113 return 114 115 parameters_dict = dict(**parameters) 116 verify_docs( 117 filename="parameters.rst", 118 identifiers=list(parameters_dict), 119 appearing_as="inline-literal", 120 ) 121 122 123 @verifications.add("kinds") 124 def verify_kinds_docs(kinds): 125 verify_docs(filename="kinds.rst", identifiers=kinds.keys(), appearing_as="heading") 126 127 128 @verifications.add("full_task_set") 129 def verify_attributes(task, taskgraph, scratch_pad, graph_config, parameters): 130 if task is None: 131 verify_docs( 132 filename="attributes.rst", 133 identifiers=list(scratch_pad["attribute_set"]), 134 appearing_as="heading", 135 ) 136 return 137 scratch_pad.setdefault("attribute_set", set()).update(task.attributes.keys()) 138 139 140 @verifications.add("full_task_graph") 141 def verify_task_graph_symbol(task, taskgraph, scratch_pad, graph_config, parameters): 142 """ 143 This function verifies that tuple 144 (collection.keys(), machine.platform, groupSymbol, symbol) is unique 145 for a target task graph. 146 """ 147 if task is None: 148 return 149 task_dict = task.task 150 if "extra" in task_dict: 151 extra = task_dict["extra"] 152 if "treeherder" in extra: 153 treeherder = extra["treeherder"] 154 155 collection_keys = tuple(sorted(treeherder.get("collection", {}).keys())) 156 if len(collection_keys) != 1: 157 raise Exception( 158 f"Task {task.label} can't be in multiple treeherder collections " 159 f"(the part of the platform after `/`): {collection_keys}" 160 ) 161 platform = treeherder.get("machine", {}).get("platform") 162 group_symbol = treeherder.get("groupSymbol") 163 symbol = treeherder.get("symbol") 164 165 key = (platform, collection_keys[0], group_symbol, symbol) 166 if key in scratch_pad: 167 raise Exception( 168 "Duplicate treeherder platform and symbol in tasks " 169 "`{}`and `{}`: {} {}".format( 170 task.label, 171 scratch_pad[key], 172 f"{platform}/{collection_keys[0]}", 173 join_symbol(group_symbol, symbol), 174 ) 175 ) 176 else: 177 scratch_pad[key] = task.label 178 179 180 @verifications.add("full_task_graph") 181 def verify_trust_domain_v2_routes( 182 task, taskgraph, scratch_pad, graph_config, parameters 183 ): 184 """ 185 This function ensures that any two tasks have distinct ``index.{trust-domain}.v2`` routes. 186 """ 187 if task is None: 188 return 189 route_prefix = "index.{}.v2".format(graph_config["trust-domain"]) 190 task_dict = task.task 191 routes = task_dict.get("routes", []) 192 193 for route in routes: 194 if route.startswith(route_prefix): 195 if route in scratch_pad: 196 raise Exception( 197 f"conflict between {task.label}:{scratch_pad[route]} for route: {route}" 198 ) 199 else: 200 scratch_pad[route] = task.label 201 202 203 @verifications.add("full_task_graph") 204 def verify_routes_notification_filters( 205 task, taskgraph, scratch_pad, graph_config, parameters 206 ): 207 """ 208 This function ensures that only understood filters for notifications are 209 specified. 210 211 See: https://firefox-ci-tc.services.mozilla.com/docs/manual/using/task-notifications 212 """ 213 if task is None: 214 return 215 route_prefix = "notify." 216 valid_filters = ( 217 "on-any", 218 "on-completed", 219 "on-defined", 220 "on-failed", 221 "on-exception", 222 "on-pending", 223 "on-resolved", 224 "on-running", 225 "on-transition", 226 ) 227 task_dict = task.task 228 routes = task_dict.get("routes", []) 229 230 for route in routes: 231 if route.startswith(route_prefix): 232 # Get the filter of the route 233 route_filter = route.split(".")[-1] 234 if route_filter not in valid_filters: 235 raise Exception( 236 f"{task.label} has invalid notification filter ({route_filter})" 237 ) 238 if route_filter == "on-any": 239 warnings.warn( 240 DeprecationWarning( 241 f"notification filter '{route_filter}' is deprecated. Use " 242 "'on-transition' or 'on-resolved'." 243 ) 244 ) 245 246 247 @verifications.add("full_task_graph") 248 def verify_dependency_tiers(task, taskgraph, scratch_pad, graph_config, parameters): 249 tiers = scratch_pad 250 if task is not None: 251 tiers[task.label] = ( 252 task.task.get("extra", {}).get("treeherder", {}).get("tier", sys.maxsize) 253 ) 254 else: 255 256 def printable_tier(tier): 257 if tier == sys.maxsize: 258 return "unknown" 259 return tier 260 261 for current_task in taskgraph.tasks.values(): 262 tier = tiers[current_task.label] 263 for d in current_task.dependencies.values(): 264 if taskgraph[d].task.get("workerType") == "always-optimized": 265 continue 266 if "dummy" in taskgraph[d].kind: 267 continue 268 if tier < tiers[d]: 269 raise Exception( 270 f"{current_task.label} (tier {printable_tier(tier)}) cannot depend on {d} (tier {printable_tier(tiers[d])})" 271 ) 272 273 274 @verifications.add("full_task_graph") 275 def verify_required_signoffs(task, taskgraph, scratch_pad, graph_config, parameters): 276 """ 277 Task with required signoffs can't be dependencies of tasks with less 278 required signoffs. 279 """ 280 all_required_signoffs = scratch_pad 281 if task is not None: 282 all_required_signoffs[task.label] = set( 283 task.attributes.get("required_signoffs", []) 284 ) 285 else: 286 287 def printable_signoff(signoffs): 288 if len(signoffs) == 1: 289 return "required signoff {}".format(*signoffs) 290 if signoffs: 291 return "required signoffs {}".format(", ".join(signoffs)) 292 return "no required signoffs" 293 294 for current_task in taskgraph.tasks.values(): 295 required_signoffs = all_required_signoffs[current_task.label] 296 for d in current_task.dependencies.values(): 297 if required_signoffs < all_required_signoffs[d]: 298 raise Exception( 299 f"{current_task.label} ({printable_signoff(required_signoffs)}) cannot depend on {d} ({printable_signoff(all_required_signoffs[d])})" 300 ) 301 302 303 @verifications.add("full_task_graph") 304 def verify_toolchain_resources_in_sparse_profile( 305 task, taskgraph, scratch_pad, graph_config, parameters 306 ): 307 """ 308 Verify that all toolchain resources are covered by the taskgraph sparse profile. 309 If not, the decision task's sparse checkout won't have these files, 310 causing incorrect hashes and breaking 'mach bootstrap' for developers. 311 """ 312 if task is not None: 313 if task.kind != "toolchain": 314 return 315 resources = task.attributes.get("toolchain-resources", []) 316 uncovered = [ 317 f for f in resources if not is_path_covered_by_taskgraph_sparse_profile(f) 318 ] 319 if uncovered: 320 uncovered_list = "\n".join(f" path:{path}" for path in uncovered) 321 scratch_pad.setdefault("errors", []).append( 322 f"Toolchain '{task.label}' has resources not covered " 323 f"by the taskgraph sparse profile.\n" 324 f"Uncovered resources:\n{uncovered_list}" 325 ) 326 else: 327 errors = scratch_pad.get("errors", []) 328 if errors: 329 raise Exception( 330 "Found toolchain resource(s) not covered by taskgraph sparse profile.\n" 331 "This will cause incorrect hashes in the decision task.\n\n" 332 + "\n\n".join(errors) 333 + "\n\nTo fix, add the above path(s) to 'build/sparse-profiles/taskgraph'." 334 ) 335 336 337 @verifications.add("full_task_graph") 338 def verify_aliases(task, taskgraph, scratch_pad, graph_config, parameters): 339 """ 340 This function verifies that aliases are not reused. 341 """ 342 if task is None: 343 return 344 if task.kind not in ("toolchain", "fetch"): 345 return 346 for_kind = scratch_pad.setdefault(task.kind, {}) 347 aliases = for_kind.setdefault("aliases", {}) 348 alias_attribute = f"{task.kind}-alias" 349 if task.label in aliases: 350 raise Exception( 351 f"Task `{aliases[task.label]}` has a {alias_attribute} of `{task.label[len(task.kind) + 1 :]}`, masking a task of that name." 352 ) 353 labels = for_kind.setdefault("labels", set()) 354 labels.add(task.label) 355 attributes = task.attributes 356 if alias_attribute in attributes: 357 keys = attributes[alias_attribute] 358 if not keys: 359 keys = [] 360 elif isinstance(keys, str): 361 keys = [keys] 362 for key in keys: 363 full_key = f"{task.kind}-{key}" 364 if full_key in labels: 365 raise Exception( 366 f"Task `{task.label}` has a {alias_attribute} of `{key}`," 367 " masking a task of that name." 368 ) 369 if full_key in aliases: 370 raise Exception( 371 f"Duplicate {alias_attribute} in tasks `{task.label}`and `{aliases[full_key]}`: {key}" 372 ) 373 else: 374 aliases[full_key] = task.label 375 376 377 @verifications.add("optimized_task_graph") 378 def verify_always_optimized(task, taskgraph, scratch_pad, graph_config, parameters): 379 """ 380 This function ensures that always-optimized tasks have been optimized. 381 """ 382 if task is None: 383 return 384 if task.task.get("workerType") == "always-optimized": 385 raise Exception(f"Could not optimize the task {task.label!r}") 386 387 388 @verifications.add("full_task_graph", run_on_projects=RELEASE_PROJECTS) 389 def verify_shippable_no_sccache(task, taskgraph, scratch_pad, graph_config, parameters): 390 if task and task.attributes.get("shippable"): 391 if task.task.get("payload", {}).get("env", {}).get("USE_SCCACHE"): 392 raise Exception(f"Shippable job {task.label} cannot use sccache") 393 394 395 @verifications.add("full_task_graph") 396 def verify_test_packaging(task, taskgraph, scratch_pad, graph_config, parameters): 397 if task is None: 398 # In certain cases there are valid reasons for tests to be missing, 399 # don't error out when that happens. 400 missing_tests_allowed = any(( 401 # user specified `--target-kind` 402 bool(parameters.get("target-kinds")), 403 # manifest scheduling is enabled 404 parameters["test_manifest_loader"] != "default", 405 )) 406 407 test_env = parameters["try_task_config"].get("env", {}) 408 if test_env.get("MOZHARNESS_TEST_PATHS", "") or test_env.get( 409 "MOZHARNESS_TEST_TAG", "" 410 ): 411 # This is sort of a hack, as we are filtering, we might filter out all test jobs 412 missing_tests_allowed = True 413 414 exceptions = [] 415 for current_task in taskgraph.tasks.values(): 416 if current_task.kind == "build" and not current_task.attributes.get( 417 "skip-verify-test-packaging" 418 ): 419 build_env = current_task.task.get("payload", {}).get("env", {}) 420 package_tests = build_env.get("MOZ_AUTOMATION_PACKAGE_TESTS") 421 shippable = current_task.attributes.get("shippable", False) 422 build_has_tests = scratch_pad.get(current_task.label) 423 424 if package_tests != "1": 425 # Shippable builds should always package tests. 426 if shippable: 427 exceptions.append( 428 f"Build job {current_task.label} is shippable and does not specify " 429 "MOZ_AUTOMATION_PACKAGE_TESTS=1 in the " 430 "environment." 431 ) 432 433 # Build tasks in the scratch pad have tests dependent on 434 # them, so we need to package tests during build. 435 if build_has_tests: 436 exceptions.append( 437 f"Build job {current_task.label} has tests dependent on it and does not specify " 438 "MOZ_AUTOMATION_PACKAGE_TESTS=1 in the environment" 439 ) 440 # Build tasks that aren't in the scratch pad have no 441 # dependent tests, so we shouldn't package tests. 442 # With the caveat that we expect shippable jobs to always 443 # produce tests. 444 elif not build_has_tests and not shippable: 445 # If we have not generated all task kinds, we can't verify that 446 # there are no dependent tests. 447 if not missing_tests_allowed: 448 exceptions.append( 449 f"Build job {current_task.label} has no tests, but specifies " 450 f"MOZ_AUTOMATION_PACKAGE_TESTS={package_tests} in the environment. " 451 "Unset MOZ_AUTOMATION_PACKAGE_TESTS in the task definition " 452 "to fix." 453 ) 454 if exceptions: 455 raise Exception("\n".join(exceptions)) 456 return 457 if task.kind in TEST_KINDS: 458 build_task = taskgraph[task.dependencies["build"]] 459 scratch_pad[build_task.label] = 1 460 461 462 @verifications.add("full_task_graph") 463 def verify_run_known_projects(task, taskgraph, scratch_pad, graph_config, parameters): 464 """Validates the inputs in run-on-projects. 465 466 We should never let 'try' (or 'try-comm-central') be in run-on-projects even though it 467 is valid because it is not considered for try pushes. While here we also validate for 468 other unknown projects or typos. 469 """ 470 if task and task.attributes.get("run_on_projects"): 471 projects = set(task.attributes["run_on_projects"]) 472 if {"try", "try-comm-central"} & set(projects): 473 raise Exception( 474 f"In task {task.label}: using try in run-on-projects is invalid; use try " 475 "selectors to select this task on try" 476 ) 477 # try isn't valid, but by the time we get here its not an available project anyway. 478 valid_projects = ALL_PROJECTS | set(RUN_ON_PROJECT_ALIASES.keys()) 479 invalid_projects = projects - valid_projects 480 if invalid_projects: 481 raise Exception( 482 f"Task '{task.label}' has an invalid run-on-projects value: " 483 f"{invalid_projects}" 484 ) 485 486 487 @verifications.add("graph_config") 488 def verify_try_expiration_policies(graph_config): 489 """We don't want any configuration leading to anything with an expiry longer 490 than 28 days on try.""" 491 now = datetime.datetime.utcnow() 492 cap = "28 days" 493 cap_from_now = fromNow(cap, now) 494 expiration_policy = evaluate_keyed_by( 495 graph_config["expiration-policy"], 496 "task expiration", 497 {"project": "try", "level": "1"}, 498 ) 499 for policy, expires in expiration_policy.items(): 500 if fromNow(expires, now) > cap_from_now: 501 raise Exception( 502 f'expiration-policy "{policy}" ({expires}) is larger than {cap} for try' 503 )