run-task (38234B)
1 #!/usr/bin/python3 -u 2 # This Source Code Form is subject to the terms of the Mozilla Public 3 # License, v. 2.0. If a copy of the MPL was not distributed with this 4 # file, You can obtain one at http://mozilla.org/MPL/2.0/. 5 6 """Run a task after performing common actions. 7 8 This script is meant to be the "driver" for TaskCluster based tasks. 9 It receives some common arguments to control the run-time environment. 10 11 It performs actions as requested from the arguments. Then it executes 12 the requested process and prints its output, prefixing it with the 13 current time to improve log usefulness. 14 """ 15 16 import sys 17 18 if sys.version_info[0:2] < (3, 5): 19 print("run-task requires Python 3.5+") 20 sys.exit(1) 21 22 23 import argparse 24 import datetime 25 import errno 26 import io 27 import json 28 import os 29 import random 30 import re 31 import shutil 32 import socket 33 import stat 34 import subprocess 35 import threading 36 import urllib.error 37 import urllib.request 38 39 FINGERPRINT_URL = ( 40 "http://taskcluster/secrets/v1/secret/project/taskcluster/gecko/hgfingerprint" 41 ) 42 FALLBACK_FINGERPRINT = { 43 "fingerprints": "sha256:4D:EB:21:6E:35:2F:99:C6:8F:C3:47:9B:57:B8:6C:17:15:8F:86:09:D4:6C:17:1D:87:B0:DE:F9:0E:51:70:FC," 44 "sha256:90:85:39:A8:4F:47:20:58:98:0D:48:4D:8A:AC:71:DB:5C:AF:76:44:F1:B1:3E:56:92:FF:21:8C:C9:A9:F7:11" 45 } 46 47 HGMOINTERNAL_CONFIG_URL = ( 48 "http://taskcluster/secrets/v1/secret/project/taskcluster/gecko/hgmointernal" 49 ) 50 51 CACHE_UID_GID_MISMATCH = """ 52 There is a UID/GID mismatch on the cache. This likely means: 53 54 a) different tasks are running as a different user/group 55 b) different Docker images have different UID/GID for the same user/group 56 57 Our cache policy is that the UID/GID for ALL tasks must be consistent 58 for the lifetime of the cache. This eliminates permissions problems due 59 to file/directory user/group ownership. 60 61 To make this error go away, ensure that all Docker images are use 62 a consistent UID/GID and that all tasks using this cache are running as 63 the same user/group. 64 """ 65 66 67 NON_EMPTY_VOLUME = """ 68 error: volume %s is not empty 69 70 Our Docker image policy requires volumes to be empty. 71 72 The volume was likely populated as part of building the Docker image. 73 Change the Dockerfile and anything run from it to not create files in 74 any VOLUME. 75 76 A lesser possibility is that you stumbled upon a TaskCluster platform bug 77 where it fails to use new volumes for tasks. 78 """ 79 80 81 FETCH_CONTENT_NOT_FOUND = """ 82 error: fetch-content script not found 83 84 The `fetch-content` script could not be detected in the current environment. 85 86 If this task clones gecko, make sure the GECKO_PATH environment variable 87 is set to proper location. Otherwise, the script may need to be mounted 88 or added to the task's docker image then added to the PATH. 89 """ 90 91 # The exit code to use when caches should be purged and the task retried. 92 # This is EX_OSFILE (from sysexits.h): 93 # Some system file does not exist, cannot be opened, or has some 94 # sort of error (e.g., syntax error). 95 EXIT_PURGE_CACHE = 72 96 97 98 IS_MACOSX = sys.platform == "darwin" 99 IS_POSIX = os.name == "posix" 100 IS_WINDOWS = os.name == "nt" 101 102 103 def print_line(prefix, m): 104 now = ( 105 datetime.datetime.now(datetime.timezone.utc) 106 .isoformat(timespec="milliseconds") 107 .encode("utf-8") 108 ) 109 bytes = b"[%s %s] %s" % (prefix, now, m) 110 written = 0 111 while written < len(bytes): 112 written += sys.stdout.buffer.write(bytes[written:]) or 0 113 sys.stdout.buffer.flush() 114 115 116 def reap_zombies(main_subprocess): 117 """Wait for main_subprocess to exit, while awaiting any other child processes""" 118 while main_subprocess.poll() is None: 119 with main_subprocess._waitpid_lock: 120 if main_subprocess.returncode is not None: 121 return 122 pid, status = os.wait() 123 if pid == main_subprocess.pid: 124 main_subprocess._handle_exitstatus(status) 125 126 127 def run_and_prefix_output(prefix, args, *, extra_env=None, cwd=None): 128 """Runs a process and prefixes its output with the time. 129 130 Returns the process exit code. 131 """ 132 print_line( 133 prefix, 134 b"executing %r%s\n" % (args, b"in %s" % (cwd.encode("utf-8"),) if cwd else b""), 135 ) 136 137 env = dict(os.environ) 138 env.update(extra_env or {}) 139 140 # Note: TaskCluster's stdin is a TTY. This attribute is lost 141 # when we pass sys.stdin to the invoked process. If we cared 142 # to preserve stdin as a TTY, we could make this work. But until 143 # someone needs it, don't bother. 144 145 # We want stdout to be bytes on Python 3. That means we can't use 146 # universal_newlines=True (because it implies text mode). But 147 # p.stdout.readline() won't work for bytes text streams. So, on Python 3, 148 # we manually install a latin1 stream wrapper. This allows us to readline() 149 # and preserves bytes, without losing any data. 150 151 p = subprocess.Popen( 152 args, 153 # Disable buffering because we want to receive output 154 # as it is generated so timestamps in logs are 155 # accurate. 156 bufsize=0, 157 stdout=subprocess.PIPE, 158 stderr=subprocess.STDOUT, 159 stdin=sys.stdin.fileno(), 160 env=env, 161 cwd=cwd, 162 ) 163 164 stdout = io.TextIOWrapper(p.stdout, encoding="latin1") 165 166 if os.getpid() == 1: 167 # in docker we're init, so we get to adopt unawaited zombies 168 reaper_thread = threading.Thread(target=reap_zombies, args=(p,)) 169 reaper_thread.start() 170 else: 171 reaper_thread = None 172 173 while True: 174 data = stdout.readline().encode("latin1") 175 176 if data == b"": 177 break 178 179 print_line(prefix, data) 180 181 if reaper_thread: 182 reaper_thread.join() 183 return p.wait() 184 185 186 def get_posix_user_group(user, group): 187 import grp 188 import pwd 189 190 try: 191 user_record = pwd.getpwnam(user) 192 except KeyError: 193 print("could not find user %s; specify a valid user with --user" % user) 194 sys.exit(1) 195 196 try: 197 group_record = grp.getgrnam(group) 198 except KeyError: 199 print("could not find group %s; specify a valid group with --group" % group) 200 sys.exit(1) 201 202 # Most tasks use worker:worker. We require they have a specific numeric ID 203 # because otherwise it is too easy for files written to caches to have 204 # mismatched numeric IDs, which results in permissions errors. 205 if user_record.pw_name == "worker" and user_record.pw_uid != 1000: 206 print("user `worker` must have uid=1000; got %d" % user_record.pw_uid) 207 sys.exit(1) 208 209 if group_record.gr_name == "worker" and group_record.gr_gid != 1000: 210 print("group `worker` must have gid=1000; got %d" % group_record.gr_gid) 211 sys.exit(1) 212 213 # Find all groups to which this user is a member. 214 gids = [g.gr_gid for g in grp.getgrall() if group in g.gr_mem] 215 216 return user_record, group_record, gids 217 218 219 def write_audit_entry(path, msg): 220 now = datetime.datetime.utcnow().isoformat().encode("utf-8") 221 with open(path, "ab") as fh: 222 fh.write(b"[%sZ %s] %s\n" % (now, os.environb.get(b"TASK_ID", b"UNKNOWN"), msg)) 223 224 225 WANTED_DIR_MODE = stat.S_IXUSR | stat.S_IRUSR | stat.S_IWUSR 226 227 228 def set_dir_permissions(path, uid, gid): 229 st = os.lstat(path) 230 231 if st.st_uid != uid or st.st_gid != gid: 232 os.chown(path, uid, gid) 233 234 # Also make sure dirs are writable in case we need to delete 235 # them. 236 if st.st_mode & WANTED_DIR_MODE != WANTED_DIR_MODE: 237 os.chmod(path, st.st_mode | WANTED_DIR_MODE) 238 239 240 def chown_recursive(path, user, group, uid, gid): 241 print_line( 242 b"chown", 243 b"recursively changing ownership of %s to %s:%s\n" 244 % (path.encode("utf-8"), user.encode("utf-8"), group.encode("utf-8")), 245 ) 246 247 set_dir_permissions(path, uid, gid) 248 249 for root, dirs, files in os.walk(path): 250 for d in dirs: 251 set_dir_permissions(os.path.join(root, d), uid, gid) 252 253 for f in files: 254 # File may be a symlink that points to nowhere. In which case 255 # os.chown() would fail because it attempts to follow the 256 # symlink. We only care about directory entries, not what 257 # they point to. So setting the owner of the symlink should 258 # be sufficient. 259 os.lchown(os.path.join(root, f), uid, gid) 260 261 262 def configure_cache_posix(cache, user, group, untrusted_caches, running_as_root): 263 """Configure a cache path on POSIX platforms. 264 265 For each cache, we write out a special file denoting attributes and 266 capabilities of run-task and the task being executed. These attributes 267 are used by subsequent run-task invocations to validate that use of 268 the cache is acceptable. 269 270 We /could/ blow away the cache data on requirements mismatch. 271 While this would be convenient, this could result in "competing" tasks 272 effectively undoing the other's work. This would slow down task 273 execution in aggregate. Without monitoring for this, people may not notice 274 the problem and tasks would be slower than they could be. We follow the 275 principle of "fail fast" to ensure optimal task execution. 276 277 We also write an audit log of who used the caches. This log is printed 278 during failures to help aid debugging. 279 """ 280 281 our_requirements = { 282 # Include a version string that we can bump whenever to trigger 283 # fresh caches. The actual value is not relevant and doesn't need 284 # to follow any explicit order. Since taskgraph bakes this file's 285 # hash into cache names, any change to this file/version is sufficient 286 # to force the use of a new cache. 287 b"version=1", 288 # Include the UID and GID the task will run as to ensure that tasks 289 # with different UID and GID don't share the same cache. 290 b"uid=%d" % user.pw_uid, 291 b"gid=%d" % group.gr_gid, 292 } 293 294 requires_path = os.path.join(cache, ".cacherequires") 295 audit_path = os.path.join(cache, ".cachelog") 296 297 # The cache is empty. Configure it. 298 if not os.listdir(cache): 299 print_line( 300 b"cache", 301 b"cache %s is empty; writing requirements: " 302 b"%s\n" % (cache.encode("utf-8"), b" ".join(sorted(our_requirements))), 303 ) 304 305 # We write a requirements file so future invocations know what the 306 # requirements are. 307 with open(requires_path, "wb") as fh: 308 fh.write(b"\n".join(sorted(our_requirements))) 309 310 # And make it read-only as a precaution against deletion. 311 os.chmod(requires_path, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH) 312 313 write_audit_entry( 314 audit_path, 315 b"created; requirements: %s" % b", ".join(sorted(our_requirements)), 316 ) 317 318 set_dir_permissions(cache, user.pw_uid, group.gr_gid) 319 return 320 321 # The cache has content and we have a requirements file. Validate 322 # requirements alignment. 323 if os.path.exists(requires_path): 324 with open(requires_path, "rb") as fh: 325 wanted_requirements = set(fh.read().splitlines()) 326 327 print_line( 328 b"cache", 329 b"cache %s exists; requirements: %s\n" 330 % (cache.encode("utf-8"), b" ".join(sorted(wanted_requirements))), 331 ) 332 333 missing = wanted_requirements - our_requirements 334 335 # Allow requirements mismatch for uid/gid if and only if caches 336 # are untrusted. This allows cache behavior on Try to be 337 # reasonable. Otherwise, random tasks could "poison" cache 338 # usability by introducing uid/gid mismatches. For untrusted 339 # environments like Try, this is a perfectly reasonable thing to 340 # allow. 341 if ( 342 missing 343 and untrusted_caches 344 and running_as_root 345 and all(s.startswith((b"uid=", b"gid=")) for s in missing) 346 ): 347 print_line( 348 b"cache", 349 b"cache %s uid/gid mismatch; this is acceptable " 350 b"because caches for this task are untrusted; " 351 b"changing ownership to facilitate cache use\n" % cache.encode("utf-8"), 352 ) 353 chown_recursive( 354 cache, user.pw_name, group.gr_name, user.pw_uid, group.gr_gid 355 ) 356 357 # And write out the updated reality. 358 with open(requires_path, "wb") as fh: 359 fh.write(b"\n".join(sorted(our_requirements))) 360 361 write_audit_entry( 362 audit_path, 363 b"chown; requirements: %s" % b", ".join(sorted(our_requirements)), 364 ) 365 366 elif missing: 367 print( 368 "error: requirements for populated cache %s differ from " 369 "this task" % cache 370 ) 371 print( 372 "cache requirements: %s" 373 % " ".join(sorted(s.decode("utf-8") for s in wanted_requirements)) 374 ) 375 print( 376 "our requirements: %s" 377 % " ".join(sorted(s.decode("utf-8") for s in our_requirements)) 378 ) 379 if any(s.startswith((b"uid=", b"gid=")) for s in missing): 380 print(CACHE_UID_GID_MISMATCH) 381 382 write_audit_entry( 383 audit_path, 384 b"requirements mismatch; wanted: %s" 385 % b", ".join(sorted(our_requirements)), 386 ) 387 388 print("") 389 print("audit log:") 390 with open(audit_path) as fh: 391 print(fh.read()) 392 393 return True 394 else: 395 write_audit_entry(audit_path, b"used") 396 397 # We don't need to adjust permissions here because the cache is 398 # associated with a uid/gid and the first task should have set 399 # a proper owner/group. 400 401 return 402 403 # The cache has content and no requirements file. This shouldn't 404 # happen because run-task should be the first thing that touches a 405 # cache. 406 print( 407 "error: cache %s is not empty and is missing a " 408 ".cacherequires file; the cache names for this task are " 409 "likely mis-configured or TASKCLUSTER_CACHES is not set " 410 "properly" % cache 411 ) 412 413 write_audit_entry(audit_path, b"missing .cacherequires") 414 return True 415 416 417 def configure_volume_posix(volume, user, group, running_as_root): 418 # The only time we should see files in the volume is if the Docker 419 # image build put files there. 420 # 421 # For the sake of simplicity, our policy is that volumes should be 422 # empty. This also has the advantage that an empty volume looks 423 # a lot like an empty cache. Tasks can rely on caches being 424 # swapped in and out on any volume without any noticeable change 425 # of behavior. 426 volume_files = os.listdir(volume) 427 if volume_files: 428 print(NON_EMPTY_VOLUME % volume) 429 print("entries in root directory: %s" % " ".join(sorted(volume_files))) 430 sys.exit(1) 431 432 # The volume is almost certainly owned by root:root. Chown it so it 433 # is writable. 434 435 if running_as_root: 436 print_line( 437 b"volume", 438 b"changing ownership of volume %s " 439 b"to %d:%d\n" % (volume.encode("utf-8"), user.pw_uid, group.gr_gid), 440 ) 441 set_dir_permissions(volume, user.pw_uid, group.gr_gid) 442 443 444 def vcs_checkout( 445 source_repo, 446 dest, 447 store_path, 448 base_repo=None, 449 revision=None, 450 branch=None, 451 fetch_hgfingerprint=False, 452 sparse_profile=None, 453 ): 454 # Specify method to checkout a revision. This defaults to revisions as 455 # SHA-1 strings, but also supports symbolic revisions like `tip` via the 456 # branch flag. 457 if revision: 458 revision_flag = "--revision" 459 revision_value = revision 460 elif branch: 461 revision_flag = "--branch" 462 revision_value = branch 463 else: 464 print("revision is not specified for checkout") 465 sys.exit(1) 466 467 if IS_MACOSX or IS_POSIX: 468 hg_bin = "hg" 469 elif IS_WINDOWS: 470 # This is where OCC installs it in the AMIs. 471 hg_bin = r"C:\Program Files\Mercurial\hg.exe" 472 if not os.path.exists(hg_bin): 473 print("could not find Mercurial executable: %s" % hg_bin) 474 sys.exit(1) 475 476 store_path = os.path.abspath(store_path) 477 args = [ 478 hg_bin, 479 "robustcheckout", 480 "--sharebase", 481 store_path, 482 "--purge", 483 ] 484 485 robustcheckout = os.path.join(os.path.dirname(__file__), "robustcheckout.py") 486 if os.path.isfile(robustcheckout): 487 args.extend( 488 [ 489 "--config", 490 f"extensions.robustcheckout={robustcheckout}", 491 ] 492 ) 493 494 # Obtain certificate fingerprints. Without this, the checkout will use the fingerprint 495 # on the system, which is managed some other way (such as puppet) 496 if fetch_hgfingerprint: 497 try: 498 print_line( 499 b"vcs", 500 b"fetching hg.mozilla.org fingerprint from %s\n" 501 % FINGERPRINT_URL.encode("utf-8"), 502 ) 503 res = urllib.request.urlopen(FINGERPRINT_URL, timeout=10) 504 secret = res.read() 505 try: 506 secret = json.loads(secret.decode("utf-8")) 507 except ValueError: 508 print_line(b"vcs", b"invalid JSON in hg fingerprint secret") 509 sys.exit(1) 510 except (urllib.error.URLError, socket.timeout): 511 print_line( 512 b"vcs", 513 b"Unable to retrieve current hg.mozilla.org fingerprint" 514 b"using the secret service, using fallback instead.", 515 ) 516 # XXX This fingerprint will not be accurate if running on an old 517 # revision after the server fingerprint has changed. 518 secret = {"secret": FALLBACK_FINGERPRINT} 519 520 hgmo_fingerprint = secret["secret"]["fingerprints"] 521 args.extend( 522 [ 523 "--config", 524 "hostsecurity.hg.mozilla.org:fingerprints=%s" % hgmo_fingerprint, 525 ] 526 ) 527 528 if base_repo: 529 args.extend(["--upstream", base_repo]) 530 if sparse_profile: 531 args.extend(["--sparseprofile", sparse_profile]) 532 533 dest = os.path.abspath(dest) 534 args.extend( 535 [ 536 revision_flag, 537 revision_value, 538 source_repo, 539 dest, 540 ] 541 ) 542 543 res = run_and_prefix_output(b"vcs", args, extra_env={"PYTHONUNBUFFERED": "1"}) 544 if res: 545 # Mitigation for bug 1539681: if for some reason the clone failed, 546 # we just remove it, so that its possible incomplete state doesn't 547 # interfere with cloning in subsequent tasks. 548 shutil.rmtree(dest, ignore_errors=True) 549 sys.exit(res) 550 551 # Update the current revision hash and ensure that it is well formed. 552 revision = subprocess.check_output( 553 [hg_bin, "log", "--rev", ".", "--template", "{node}"], 554 cwd=dest, 555 # Triggers text mode on Python 3. 556 universal_newlines=True, 557 ) 558 559 assert re.match("^[a-f0-9]{40}$", revision) 560 561 msg = ( 562 "TinderboxPrint:<a href={source_repo}/rev/{revision} " 563 "title='Built from {repo_name} revision {revision}'>" 564 "{revision}</a>\n".format( 565 revision=revision, 566 source_repo=source_repo, 567 repo_name=source_repo.split("/")[-1], 568 ) 569 ) 570 571 print_line(b"vcs", msg.encode("utf-8")) 572 573 return revision 574 575 576 def fetch_artifacts(): 577 print_line(b"fetches", b"fetching artifacts\n") 578 579 fetch_content = shutil.which("fetch-content") 580 if not fetch_content and os.environ.get("GECKO_PATH"): 581 fetch_content = os.path.join( 582 os.environ["GECKO_PATH"], 583 "third_party", 584 "python", 585 "taskcluster_taskgraph", 586 "taskgraph", 587 "run-task", 588 "fetch-content", 589 ) 590 591 if not fetch_content or not os.path.isfile(fetch_content): 592 fetch_content = os.path.join(os.path.dirname(__file__), "fetch-content") 593 594 if not os.path.isfile(fetch_content): 595 print(FETCH_CONTENT_NOT_FOUND) 596 sys.exit(1) 597 598 cmd = [sys.executable, "-u", fetch_content, "task-artifacts"] 599 res = run_and_prefix_output(b"fetches", cmd) 600 if res: 601 sys.exit(res) 602 603 print_line(b"fetches", b"finished fetching artifacts\n") 604 605 606 def add_vcs_arguments(parser, project, name): 607 """Adds arguments to ArgumentParser to control VCS options for a project.""" 608 609 parser.add_argument( 610 "--%s-checkout" % project, 611 help="Directory where %s checkout should be created" % name, 612 ) 613 parser.add_argument( 614 "--%s-sparse-profile" % project, 615 help="Path to sparse profile for %s checkout" % name, 616 ) 617 618 619 def resolve_checkout_url(base_repo, head_repo): 620 """Resolve the Mercurial URL to perform a checkout against, either the 621 public hg.mozilla.org service or a CI-only regional mirror. 622 623 The config will be of the form: 624 { 625 "aws/us-west-2": { # key built from `TASKCLUSTER_WORKER_LOCATION` variable 626 "rate": 0.5, 627 "domain": "us-west-2.hgmointernal.net" 628 }, 629 "google/us-central1": {...} 630 } 631 """ 632 worker_location = os.getenv("TASKCLUSTER_WORKER_LOCATION") 633 if not worker_location: 634 print_line( 635 b"vcs", 636 b"TASKCLUSTER_WORKER_LOCATION environment variable not set; " 637 b"using public hg.mozilla.org service\n", 638 ) 639 return base_repo, head_repo 640 641 try: 642 worker_location = json.loads(worker_location) 643 except json.JSONDecodeError: 644 print_line( 645 b"vcs", 646 b"Could not decode TASKCLUSTER_WORKER_LOCATION environment variable " 647 b"as JSON. Content: %s\n" % worker_location.encode("utf-8"), 648 ) 649 print_line(b"vcs", b"using public hg.mozilla.org service\n") 650 return base_repo, head_repo 651 652 if "cloud" not in worker_location or "region" not in worker_location: 653 print_line( 654 b"vcs", 655 b"TASKCLUSTER_WORKER_LOCATION missing required keys; " 656 b"using public hg.mozilla.org service\n", 657 ) 658 return base_repo, head_repo 659 660 config_key = "%(cloud)s/%(region)s" % worker_location 661 662 try: 663 print_line( 664 b"vcs", 665 b"fetching hgmointernal config from %s\n" 666 % HGMOINTERNAL_CONFIG_URL.encode("utf-8"), 667 ) 668 669 # Get the hgmointernal config Taskcluster secret 670 res = urllib.request.urlopen(HGMOINTERNAL_CONFIG_URL, timeout=10) 671 hgmointernal_config = json.loads(res.read().decode("utf-8"))["secret"] 672 673 # Use public hg service if region not yet supported 674 if config_key not in hgmointernal_config: 675 print_line( 676 b"vcs", 677 b"region %s not yet supported; using public " 678 b"hg.mozilla.org service\n" % config_key.encode("utf-8"), 679 ) 680 681 return base_repo, head_repo 682 683 # Only send a percentage of traffic to the internal mirror 684 rate = float(hgmointernal_config[config_key]["rate"]) 685 686 if random.random() > rate: 687 print_line( 688 b"vcs", 689 b"hgmointernal rate miss; using " b"public hg.mozilla.org service\n", 690 ) 691 return base_repo, head_repo 692 693 print_line( 694 b"vcs", b"hgmointernal rate hit; cloning from " b"private hgweb mirror\n" 695 ) 696 697 mirror_domain = hgmointernal_config[config_key]["domain"] 698 699 if base_repo and base_repo.startswith("https://hg.mozilla.org"): 700 base_repo = base_repo.replace("hg.mozilla.org", mirror_domain, 1) 701 702 if head_repo and head_repo.startswith("https://hg.mozilla.org"): 703 head_repo = head_repo.replace("hg.mozilla.org", mirror_domain, 1) 704 705 return base_repo, head_repo 706 707 except (KeyError, ValueError): 708 print_line( 709 b"vcs", 710 b"invalid JSON in hgmointernal config; " 711 b"falling back to public hg.mozilla.org service\n", 712 ) 713 714 except (urllib.error.URLError, socket.timeout): 715 print_line( 716 b"vcs", 717 b"Unable to retrieve hgmointernal config using " 718 b"the secret service; falling back to public hg.mozilla.org " 719 b"service\n", 720 ) 721 722 return base_repo, head_repo 723 724 725 def collect_vcs_options(args, project): 726 checkout = getattr(args, "%s_checkout" % project) 727 sparse_profile = getattr(args, "%s_sparse_profile" % project) 728 729 env_prefix = project.upper() 730 731 base_repo = os.environ.get("%s_BASE_REPOSITORY" % env_prefix) 732 head_repo = os.environ.get("%s_HEAD_REPOSITORY" % env_prefix) 733 revision = os.environ.get("%s_HEAD_REV" % env_prefix) 734 branch = os.environ.get("%s_HEAD_REF" % env_prefix) 735 736 store_path = os.environ.get("HG_STORE_PATH") 737 738 # Expand ~ in some paths. 739 if checkout: 740 checkout = os.path.expanduser(checkout) 741 if store_path: 742 store_path = os.path.expanduser(store_path) 743 744 # Some callers set the base repository to mozilla-central for historical 745 # reasons. Switch to mozilla-unified because robustcheckout works best 746 # with it. 747 if base_repo == "https://hg.mozilla.org/mozilla-central": 748 base_repo = "https://hg.mozilla.org/mozilla-unified" 749 750 # No need to check the hgmointernal config if we aren't performing 751 # a checkout. 752 if checkout: 753 base_repo, head_repo = resolve_checkout_url(base_repo, head_repo) 754 755 return { 756 "store-path": store_path, 757 "project": project, 758 "env-prefix": env_prefix, 759 "checkout": checkout, 760 "sparse-profile": sparse_profile, 761 "base-repo": base_repo, 762 "head-repo": head_repo, 763 "revision": revision, 764 "branch": branch, 765 } 766 767 768 def vcs_checkout_from_args(args, project): 769 options = collect_vcs_options(args, project) 770 771 if not options["checkout"]: 772 if options["branch"] and not options["revision"]: 773 print("task should be defined in terms of non-symbolic revision") 774 sys.exit(1) 775 return 776 777 os.environ["%s_HEAD_REV" % options["env-prefix"]] = vcs_checkout( 778 options["head-repo"], 779 options["checkout"], 780 options["store-path"], 781 base_repo=options["base-repo"], 782 revision=options["revision"], 783 fetch_hgfingerprint=args.fetch_hgfingerprint, 784 branch=options["branch"], 785 sparse_profile=options["sparse-profile"], 786 ) 787 788 789 def main(args): 790 task_workdir = os.getcwd() 791 792 # Task definitions generally expect to use forward slashes, so normalize to 793 # avoid paths with mixed separators. 794 if os.sep != "/": 795 task_workdir = task_workdir.replace(os.sep, "/") 796 797 os.environ["TASK_WORKDIR"] = task_workdir 798 print_line(b"setup", b"run-task started in %s\n" % task_workdir.encode("utf-8")) 799 running_as_root = IS_POSIX and os.getuid() == 0 800 801 # Set a reasonable limit to the number of open files. 802 # Running under docker inherits the system defaults, which are not subject 803 # to the "standard" limits set by pam_limits.so, and while they work well 804 # for servers that may receive a lot of connections, they cause performance 805 # problems for things that close file descriptors before forking (for good 806 # reasons), like python's `subprocess.Popen(..., close_fds=True)` (and while 807 # the default was close_fds=False in python2, that changed in python3). 808 # In some cases, Firefox does the same thing when spawning subprocesses. 809 # Processes spawned by this one will inherit the limit set here. 810 try: 811 import resource 812 813 # Keep the hard limit the same, though, allowing processes to change their 814 # soft limit if they need to (Firefox does, for instance). 815 (soft, hard) = resource.getrlimit(resource.RLIMIT_NOFILE) 816 limit = os.environ.get("MOZ_LIMIT_NOFILE") 817 if limit: 818 limit = int(limit) 819 else: 820 # If no explicit limit is given, use 1024 if it's less than the current 821 # soft limit. For instance, the default on macOS is 256, so we'd pick 822 # that rather than 1024. 823 limit = min(soft, 1024) 824 # Now apply the limit, if it's different from the original one. 825 if limit != soft: 826 resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard)) 827 except ImportError: 828 # The resource module is UNIX only. 829 pass 830 831 # Arguments up to '--' are ours. After are for the main task 832 # to be executed. 833 try: 834 i = args.index("--") 835 our_args = args[0:i] 836 task_args = args[i + 1 :] 837 except ValueError: 838 our_args = args 839 task_args = [] 840 841 parser = argparse.ArgumentParser() 842 parser.add_argument("--user", default="worker", help="user to run as") 843 parser.add_argument("--group", default="worker", help="group to run as") 844 parser.add_argument("--task-cwd", help="directory to run the provided command in") 845 846 add_vcs_arguments(parser, "gecko", "Firefox") 847 add_vcs_arguments(parser, "comm", "Comm") 848 849 parser.add_argument( 850 "--fetch-hgfingerprint", 851 action="store_true", 852 help="Fetch the latest hgfingerprint from the secrets store, " 853 "using the taskclsuerProxy", 854 ) 855 856 args = parser.parse_args(our_args) 857 858 uid = gid = gids = None 859 if IS_POSIX and running_as_root: 860 user, group, gids = get_posix_user_group(args.user, args.group) 861 uid = user.pw_uid 862 gid = group.gr_gid 863 864 if running_as_root and os.path.exists("/dev/kvm"): 865 # Ensure kvm permissions for worker, required for Android x86 866 st = os.stat("/dev/kvm") 867 os.chmod("/dev/kvm", st.st_mode | 0o666) 868 869 # Validate caches. 870 # 871 # Taskgraph should pass in a list of paths that are caches via an 872 # environment variable (which we don't want to pass down to child 873 # processes). 874 875 if "TASKCLUSTER_CACHES" in os.environ: 876 caches = os.environ["TASKCLUSTER_CACHES"].split(";") 877 del os.environ["TASKCLUSTER_CACHES"] 878 else: 879 caches = [] 880 881 if "TASKCLUSTER_UNTRUSTED_CACHES" in os.environ: 882 untrusted_caches = True 883 del os.environ["TASKCLUSTER_UNTRUSTED_CACHES"] 884 else: 885 untrusted_caches = False 886 887 for cache in caches: 888 if not os.path.isdir(cache): 889 print( 890 "error: cache %s is not a directory; this should never " 891 "happen" % cache 892 ) 893 return 1 894 895 if running_as_root: 896 purge = configure_cache_posix( 897 cache, user, group, untrusted_caches, running_as_root 898 ) 899 900 if purge: 901 return EXIT_PURGE_CACHE 902 903 if "TASKCLUSTER_VOLUMES" in os.environ: 904 volumes = os.environ["TASKCLUSTER_VOLUMES"].split(";") 905 del os.environ["TASKCLUSTER_VOLUMES"] 906 else: 907 volumes = [] 908 909 if volumes and not IS_POSIX: 910 print("assertion failed: volumes not expected on Windows") 911 return 1 912 913 # Sanitize volumes. 914 for volume in volumes: 915 # If a volume is a cache, it was dealt with above. 916 if volume in caches: 917 print_line(b"volume", b"volume %s is a cache\n" % volume.encode("utf-8")) 918 continue 919 920 if running_as_root: 921 configure_volume_posix(volume, user, group, running_as_root) 922 923 all_caches_and_volumes = set(map(os.path.normpath, caches)) 924 all_caches_and_volumes |= set(map(os.path.normpath, volumes)) 925 926 def path_in_cache_or_volume(path): 927 path = os.path.normpath(path) 928 929 while path: 930 if path in all_caches_and_volumes: 931 return True 932 933 path, child = os.path.split(path) 934 if not child: 935 break 936 937 return False 938 939 def prepare_checkout_dir(checkout): 940 if not checkout: 941 return 942 943 # The checkout path becomes the working directory. Since there are 944 # special cache files in the cache's root directory and working 945 # directory purging could blow them away, disallow this scenario. 946 if os.path.exists(os.path.join(checkout, ".cacherequires")): 947 print("error: cannot perform vcs checkout into cache root: %s" % checkout) 948 sys.exit(1) 949 950 # TODO given the performance implications, consider making this a fatal 951 # error. 952 if not path_in_cache_or_volume(checkout): 953 print_line( 954 b"vcs", 955 b"WARNING: vcs checkout path (%s) not in cache " 956 b"or volume; performance will likely suffer\n" 957 % checkout.encode("utf-8"), 958 ) 959 960 # Ensure the directory for the source checkout exists. 961 try: 962 os.makedirs(os.path.dirname(checkout)) 963 except OSError as e: 964 if e.errno != errno.EEXIST: 965 raise 966 967 # And that it is owned by the appropriate user/group. 968 if running_as_root: 969 os.chown(os.path.dirname(checkout), uid, gid) 970 971 def prepare_hg_store_path(): 972 # And ensure the shared store path exists and has proper permissions. 973 if "HG_STORE_PATH" not in os.environ: 974 print("error: HG_STORE_PATH environment variable not set") 975 sys.exit(1) 976 977 store_path = os.environ["HG_STORE_PATH"] 978 979 if not path_in_cache_or_volume(store_path): 980 print_line( 981 b"vcs", 982 b"WARNING: HG_STORE_PATH (%s) not in cache or " 983 b"volume; performance will likely suffer\n" 984 % store_path.encode("utf-8"), 985 ) 986 987 try: 988 os.makedirs(store_path) 989 except OSError as e: 990 if e.errno != errno.EEXIST: 991 raise 992 993 if running_as_root: 994 os.chown(store_path, uid, gid) 995 996 prepare_checkout_dir(args.gecko_checkout) 997 if args.gecko_checkout or args.comm_checkout: 998 prepare_hg_store_path() 999 1000 # Bug 1930944: work around docker-worker not running the image's entry point for interactive tasks. 1001 # Tests need a system bus, which needs to be started before dropping privileges, so do this here until we're off 1002 # docker-worker; see 1003 # https://searchfox.org/mozilla-central/rev/8c9c85c74e366c11ffacbb5a2e457b33b0acc9cd/taskcluster/docker/ubuntu1804-test/Dockerfile#79-83 1004 if ( 1005 running_as_root 1006 and "TASKCLUSTER_INTERACTIVE" in os.environ 1007 and os.access("/etc/init.d/dbus", os.X_OK) 1008 ): 1009 subprocess.run(["/etc/init.d/dbus", "start"]) 1010 1011 if IS_POSIX and running_as_root: 1012 # Drop permissions to requested user. 1013 # This code is modeled after what `sudo` was observed to do in a Docker 1014 # container. We do not bother calling setrlimit() because containers have 1015 # their own limits. 1016 print_line( 1017 b"setup", 1018 b"running as %s:%s\n" 1019 % (args.user.encode("utf-8"), args.group.encode("utf-8")), 1020 ) 1021 1022 os.setgroups(gids) 1023 os.umask(0o22) 1024 os.setresgid(gid, gid, gid) 1025 os.setresuid(uid, uid, uid) 1026 1027 vcs_checkout_from_args(args, "gecko") 1028 vcs_checkout_from_args(args, "comm") 1029 1030 for k in ( 1031 "CARGO_HOME", 1032 "GECKO_PATH", 1033 "MOZ_FETCHES_DIR", 1034 "MOZ_PYTHON_HOME", 1035 "PIP_CACHE_DIR", 1036 "UPLOAD_DIR", 1037 "UV_CACHE_DIR", 1038 "npm_config_cache", 1039 "MOZ_UV_HOME", 1040 ): 1041 if k in os.environ: 1042 # Normalize paths to use forward slashes. Some shell scripts 1043 # tolerate that better on Windows. 1044 os.environ[k] = os.path.abspath(os.environ[k]).replace(os.sep, "/") 1045 print_line( 1046 b"setup", 1047 b"%s is %s\n" % (k.encode("utf-8"), os.environ[k].encode("utf-8")), 1048 ) 1049 1050 if "MOZ_FETCHES" in os.environ: 1051 fetch_artifacts() 1052 1053 # If Python is a fetch dependency, add it to the PATH and setting 1054 # the mozilla-specific MOZ_PYTHON_HOME to relocate binaries. 1055 if "MOZ_PYTHON_HOME" in os.environ: 1056 1057 print_line(b"setup", b"Setting up local python environment\n") 1058 prev = [os.environ["PATH"]] if "PATH" in os.environ else [] 1059 1060 moz_python_home = os.environ["MOZ_PYTHON_HOME"] 1061 if IS_WINDOWS: 1062 ext = ".exe" 1063 moz_python_bindir = moz_python_home 1064 else: 1065 ext = "" 1066 moz_python_bindir = moz_python_home + "/bin" 1067 1068 new = os.environ["PATH"] = os.pathsep.join([moz_python_bindir] + prev) 1069 1070 # Relocate the python binary. Standard way uses PYTHONHOME, but 1071 # this conflicts with system python (e.g. used by hg) so we 1072 # maintain a small patch to use MOZPYTHONHOME instead. 1073 os.environ["MOZPYTHONHOME"] = moz_python_home 1074 1075 pyinterp = os.path.join(moz_python_bindir, f"python3{ext}") 1076 # just a sanity check 1077 if not os.path.exists(pyinterp): 1078 raise RuntimeError( 1079 "Inconsistent Python installation: " 1080 "archive found, but no python3 binary " 1081 "detected" 1082 ) 1083 1084 if IS_MACOSX: 1085 # On OSX, we may not have access to the system certificate, 1086 # so use the certifi ones. 1087 certifi_cert_file = subprocess.check_output( 1088 [pyinterp, "-c", "import certifi; print(certifi.where())"], 1089 text=True, 1090 ) 1091 os.environ["SSL_CERT_FILE"] = certifi_cert_file.strip() 1092 print_line(b"setup", b"patching ssl certificate\n") 1093 1094 print_line( 1095 b"setup", b"updated PATH with python artifact: " + new.encode() + b"\n" 1096 ) 1097 1098 if 'MOZ_UV_HOME' in os.environ: 1099 print_line(b'setup', b'Adding uv to PATH\n') 1100 current_env = [os.environ['PATH']] if 'PATH' in os.environ else [] 1101 moz_python_bindir = os.environ['MOZ_UV_HOME'] 1102 new = os.environ['PATH'] = os.pathsep.join([moz_python_bindir] + current_env) 1103 print_line( 1104 b"setup", b"updated PATH with uv artifact: " + new.encode() + b"\n" 1105 ) 1106 1107 return run_and_prefix_output(b"task", task_args, cwd=args.task_cwd) 1108 1109 1110 if __name__ == "__main__": 1111 sys.exit(main(sys.argv[1:]))