fetch.py (14852B)
1 # This Source Code Form is subject to the terms of the Mozilla Public 2 # License, v. 2.0. If a copy of the MPL was not distributed with this 3 # file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 5 # Support for running tasks that download remote content and re-export 6 # it as task artifacts. 7 8 9 import os 10 import re 11 12 import attr 13 import taskgraph 14 from mozpack import path as mozpath 15 from mozshellutil import quote as shell_quote 16 from taskgraph.transforms.base import TransformSequence 17 from taskgraph.util.schema import Schema, validate_schema 18 from taskgraph.util.treeherder import join_symbol 19 from voluptuous import Any, Extra, Optional, Required 20 21 import gecko_taskgraph 22 from gecko_taskgraph.transforms.task import task_description_schema 23 24 from ..util.cached_tasks import add_optimization 25 26 CACHE_TYPE = "content.v1" 27 28 FETCH_SCHEMA = Schema({ 29 # Name of the task. 30 Required("name"): str, 31 # Relative path (from config.path) to the file the task was defined 32 # in. 33 Optional("task-from"): str, 34 # Description of the task. 35 Required("description"): str, 36 Optional( 37 "fetch-alias", 38 description="An alias that can be used instead of the real fetch job name in " 39 "fetch stanzas for jobs.", 40 ): str, 41 Optional( 42 "artifact-prefix", 43 description="The prefix of the taskcluster artifact being uploaded. " 44 "Defaults to `public/`; if it starts with something other than " 45 "`public/` the artifact will require scopes to access.", 46 ): str, 47 Optional("attributes"): {str: object}, 48 Optional("run-on-repo-type"): task_description_schema["run-on-repo-type"], 49 Required("fetch"): { 50 Required("type"): str, 51 Extra: object, 52 }, 53 }) 54 55 56 # define a collection of payload builders, depending on the worker implementation 57 fetch_builders = {} 58 59 60 @attr.s(frozen=True) 61 class FetchBuilder: 62 schema = attr.ib(type=Schema) 63 builder = attr.ib() 64 65 66 def fetch_builder(name, schema): 67 schema = Schema({Required("type"): name}).extend(schema) 68 69 def wrap(func): 70 fetch_builders[name] = FetchBuilder(schema, func) 71 return func 72 73 return wrap 74 75 76 transforms = TransformSequence() 77 transforms.add_validate(FETCH_SCHEMA) 78 79 80 @transforms.add 81 def process_fetch_job(config, jobs): 82 # Converts fetch-url entries to the job schema. 83 for job in jobs: 84 typ = job["fetch"]["type"] 85 name = job["name"] 86 fetch = job.pop("fetch") 87 88 if typ not in fetch_builders: 89 raise Exception(f"Unknown fetch type {typ} in fetch {name}") 90 validate_schema(fetch_builders[typ].schema, fetch, f"In task.fetch {name!r}:") 91 92 job.update(configure_fetch(config, typ, name, fetch)) 93 94 yield job 95 96 97 def configure_fetch(config, typ, name, fetch): 98 if typ not in fetch_builders: 99 raise Exception(f"No fetch type {typ} in fetch {name}") 100 validate_schema(fetch_builders[typ].schema, fetch, f"In task.fetch {name!r}:") 101 102 return fetch_builders[typ].builder(config, name, fetch) 103 104 105 @transforms.add 106 def make_task(config, jobs): 107 # Fetch tasks are idempotent and immutable. Have them live for 108 # essentially forever. 109 if config.params["level"] == "3": 110 expires = "1000 years" 111 else: 112 expires = "28 days" 113 114 for job in jobs: 115 name = job["name"] 116 artifact_prefix = job.get("artifact-prefix", "public") 117 env = job.get("env", {}) 118 env.update({"UPLOAD_DIR": "/builds/worker/artifacts"}) 119 attributes = job.get("attributes", {}) 120 attributes["artifact_prefix"] = artifact_prefix 121 attributes["fetch-artifact"] = mozpath.join( 122 artifact_prefix, job["artifact_name"] 123 ) 124 alias = job.get("fetch-alias") 125 if alias: 126 attributes["fetch-alias"] = alias 127 128 task_expires = "28 days" if attributes.get("cached_task") is False else expires 129 artifact_expires = ( 130 "2 days" if attributes.get("cached_task") is False else expires 131 ) 132 133 task = { 134 "attributes": attributes, 135 "name": name, 136 "description": job["description"], 137 "expires-after": task_expires, 138 "label": "fetch-%s" % name, 139 "run-on-projects": [], 140 "run-on-repo-type": job.get("run-on-repo-type", ["git", "hg"]), 141 "treeherder": { 142 "symbol": join_symbol("Fetch", name), 143 "kind": "build", 144 "platform": "fetch/opt", 145 "tier": 1, 146 }, 147 "run": { 148 "using": "run-task", 149 "checkout": False, 150 "command": job["command"], 151 }, 152 "worker-type": "b-linux", 153 "worker": { 154 "chain-of-trust": True, 155 "docker-image": {"in-tree": job.get("docker-image", "fetch")}, 156 "env": env, 157 "max-run-time": 900, 158 "artifacts": [ 159 { 160 "type": "directory", 161 "name": artifact_prefix, 162 "path": "/builds/worker/artifacts", 163 "expires-after": artifact_expires, 164 } 165 ], 166 }, 167 } 168 169 if job.get("secret", None): 170 task["scopes"] = ["secrets:get:" + job.get("secret")] 171 task["worker"]["taskcluster-proxy"] = True 172 173 # Fetches that are used for local development need to be built on a 174 # level-3 branch to be installable via `mach bootstrap`. 175 if attributes.get("local-fetch"): 176 task["run-on-projects"] = ["integration", "release"] 177 178 if not taskgraph.fast: 179 cache_name = task["label"].replace(f"{config.kind}-", "", 1) 180 181 # This adds the level to the index path automatically. 182 add_optimization( 183 config, 184 task, 185 cache_type=CACHE_TYPE, 186 cache_name=cache_name, 187 digest_data=job["digest_data"], 188 ) 189 yield task 190 191 192 @fetch_builder( 193 "static-url", 194 schema={ 195 # The URL to download. 196 Required("url"): str, 197 # The SHA-256 of the downloaded content. 198 Required("sha256"): str, 199 # Size of the downloaded entity, in bytes. 200 Required("size"): int, 201 # GPG signature verification. 202 Optional("gpg-signature"): { 203 # URL where GPG signature document can be obtained. Can contain the 204 # value ``{url}``, which will be substituted with the value from 205 # ``url``. 206 Required("sig-url"): str, 207 # Path to file containing GPG public key(s) used to validate 208 # download. 209 Required("key-path"): str, 210 }, 211 Optional("headers"): [str], 212 # The name to give to the generated artifact. Defaults to the file 213 # portion of the URL. Using a different extension converts the 214 # archive to the given type. Only conversion to .tar.zst is 215 # supported. 216 Optional("artifact-name"): str, 217 # Strip the given number of path components at the beginning of 218 # each file entry in the archive. 219 # Requires an artifact-name ending with .tar.zst. 220 Optional("strip-components"): int, 221 # Add the given prefix to each file entry in the archive. 222 # Requires an artifact-name ending with .tar.zst. 223 Optional("add-prefix"): str, 224 # IMPORTANT: when adding anything that changes the behavior of the task, 225 # it is important to update the digest data used to compute cache hits. 226 }, 227 ) 228 def create_fetch_url_task(config, name, fetch): 229 artifact_name = fetch.get("artifact-name") 230 if not artifact_name: 231 artifact_name = fetch["url"].split("/")[-1] 232 233 command = [ 234 "/builds/worker/bin/fetch-content", 235 "static-url", 236 ] 237 238 # Arguments that matter to the cache digest 239 args = [ 240 "--sha256", 241 fetch["sha256"], 242 "--size", 243 "%d" % fetch["size"], 244 ] 245 246 if fetch.get("strip-components"): 247 args.extend(["--strip-components", "%d" % fetch["strip-components"]]) 248 249 if fetch.get("add-prefix"): 250 args.extend(["--add-prefix", fetch["add-prefix"]]) 251 252 command.extend(args) 253 254 env = {} 255 256 if "gpg-signature" in fetch: 257 sig_url = fetch["gpg-signature"]["sig-url"].format(url=fetch["url"]) 258 key_path = os.path.join( 259 gecko_taskgraph.GECKO, fetch["gpg-signature"]["key-path"] 260 ) 261 262 with open(key_path) as fh: 263 gpg_key = fh.read() 264 265 env["FETCH_GPG_KEY"] = gpg_key 266 command.extend([ 267 "--gpg-sig-url", 268 sig_url, 269 "--gpg-key-env", 270 "FETCH_GPG_KEY", 271 ]) 272 273 for header in fetch.get("headers", []): 274 command.extend(["--header", header]) 275 276 command.extend([ 277 fetch["url"], 278 "/builds/worker/artifacts/%s" % artifact_name, 279 ]) 280 281 return { 282 "command": command, 283 "artifact_name": artifact_name, 284 "env": env, 285 # We don't include the GPG signature in the digest because it isn't 286 # materially important for caching: GPG signatures are supplemental 287 # trust checking beyond what the shasum already provides. 288 "digest_data": args + [artifact_name], 289 } 290 291 292 @fetch_builder( 293 "git", 294 schema={ 295 Required("repo"): str, 296 Required(Any("revision", "branch")): str, 297 Optional("include-dot-git"): bool, 298 Optional("artifact-name"): str, 299 Optional("path-prefix"): str, 300 # ssh-key is a taskcluster secret path (e.g. project/civet/github-deploy-key) 301 # In the secret dictionary, the key should be specified as 302 # "ssh_privkey": "-----BEGIN OPENSSH PRIVATE KEY-----\nkfksnb3jc..." 303 # n.b. The OpenSSH private key file format requires a newline at the end of the file. 304 Optional("ssh-key"): str, 305 }, 306 ) 307 def create_git_fetch_task(config, name, fetch): 308 path_prefix = fetch.get("path-prefix") 309 if not path_prefix: 310 path_prefix = fetch["repo"].rstrip("/").rsplit("/", 1)[-1] 311 artifact_name = fetch.get("artifact-name") 312 if not artifact_name: 313 artifact_name = f"{path_prefix}.tar.zst" 314 315 if "revision" in fetch and "branch" in fetch: 316 raise Exception("revision and branch cannot be used in the same context") 317 318 revision_or_branch = None 319 320 if "revision" in fetch: 321 revision_or_branch = fetch["revision"] 322 if not re.match(r"[0-9a-fA-F]{40}", fetch["revision"]): 323 raise Exception(f'Revision is not a sha1 in fetch task "{name}"') 324 else: 325 # we are sure we are dealing with a branch 326 revision_or_branch = fetch["branch"] 327 328 args = [ 329 "/builds/worker/bin/fetch-content", 330 "git-checkout-archive", 331 "--path-prefix", 332 path_prefix, 333 fetch["repo"], 334 revision_or_branch, 335 "/builds/worker/artifacts/%s" % artifact_name, 336 ] 337 338 ssh_key = fetch.get("ssh-key") 339 if ssh_key: 340 args.append("--ssh-key-secret") 341 args.append(ssh_key) 342 343 digest_data = [revision_or_branch, path_prefix, artifact_name] 344 if fetch.get("include-dot-git", False): 345 args.append("--include-dot-git") 346 digest_data.append(".git") 347 348 return { 349 "command": args, 350 "artifact_name": artifact_name, 351 "digest_data": digest_data, 352 "secret": ssh_key, 353 } 354 355 356 @fetch_builder( 357 "onnxruntime-deps-fetch", 358 schema={ 359 Required("repo"): str, 360 Required("revision"): str, 361 Required("artifact-name"): str, 362 }, 363 ) 364 def create_onnxruntime_deps_fetch_task(config, name, fetch): 365 artifact_name = fetch.get("artifact-name") 366 workdir = "/builds/worker" 367 368 script = os.path.join(workdir, "bin/fetch-onnxruntime-deps.sh") 369 repo = fetch["repo"] 370 revision = fetch["revision"] 371 372 cmd = ["bash", "-c", f"cd {workdir} && /bin/sh {script} {repo} {revision}"] 373 374 return { 375 "command": cmd, 376 "artifact_name": artifact_name, 377 "docker-image": "fetch-more", 378 "digest_data": [ 379 f"repo={repo}", 380 f"revision={revision}", 381 f"artifact_name={artifact_name}", 382 ], 383 } 384 385 386 @fetch_builder( 387 "chromium-fetch", 388 schema={ 389 Required("script"): str, 390 # Platform type for chromium build 391 Required("platform"): str, 392 # Chromium revision to obtain 393 Optional("revision"): str, 394 # The name to give to the generated artifact. 395 Required("artifact-name"): str, 396 }, 397 ) 398 def create_chromium_fetch_task(config, name, fetch): 399 artifact_name = fetch.get("artifact-name") 400 401 workdir = "/builds/worker" 402 403 platform = fetch.get("platform") 404 revision = fetch.get("revision") 405 406 args = "--platform " + shell_quote(platform) 407 if revision: 408 args += " --revision " + shell_quote(revision) 409 410 cmd = [ 411 "bash", 412 "-c", 413 "cd {} && /usr/bin/python3 {} {}".format(workdir, fetch["script"], args), 414 ] 415 416 return { 417 "command": cmd, 418 "artifact_name": artifact_name, 419 "docker-image": "fetch-more", 420 "digest_data": [ 421 f"revision={revision}", 422 f"platform={platform}", 423 f"artifact_name={artifact_name}", 424 ], 425 } 426 427 428 @fetch_builder( 429 "cft-chromedriver-fetch", 430 schema={ 431 Required("script"): str, 432 # Platform type for chromium build 433 Required("platform"): str, 434 # The name to give to the generated artifact. 435 Required("artifact-name"): str, 436 # The chrome channel to download from. 437 Optional("channel"): str, 438 # Determine if we are fetching a backup (stable version - 1) driver. 439 Optional("backup"): bool, 440 # Pin a stable version of chrome to download from. To be used together with `backup`. 441 Optional("version"): str, 442 }, 443 ) 444 def create_cft_canary_fetch_task(config, name, fetch): 445 artifact_name = fetch.get("artifact-name") 446 447 workdir = "/builds/worker" 448 449 platform = fetch.get("platform") 450 channel = fetch.get("channel") 451 version = fetch.get("version") 452 backup = fetch.get("backup", False) 453 454 args = "--platform " + shell_quote(platform) 455 if channel: 456 args += " --channel " + shell_quote(channel) 457 458 if backup: 459 args += " --backup" 460 # only allow pinning version with backup 461 if version: 462 args += " --version " + shell_quote(version) 463 464 cmd = [ 465 "bash", 466 "-c", 467 "cd {} && /usr/bin/python3 {} {}".format(workdir, fetch["script"], args), 468 ] 469 470 return { 471 "command": cmd, 472 "artifact_name": artifact_name, 473 "docker-image": "fetch-more", 474 "digest_data": [ 475 f"platform={platform}", 476 f"artifact_name={artifact_name}", 477 ], 478 }