tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

fetch.py (14852B)


      1 # This Source Code Form is subject to the terms of the Mozilla Public
      2 # License, v. 2.0. If a copy of the MPL was not distributed with this
      3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
      4 
      5 # Support for running tasks that download remote content and re-export
      6 # it as task artifacts.
      7 
      8 
      9 import os
     10 import re
     11 
     12 import attr
     13 import taskgraph
     14 from mozpack import path as mozpath
     15 from mozshellutil import quote as shell_quote
     16 from taskgraph.transforms.base import TransformSequence
     17 from taskgraph.util.schema import Schema, validate_schema
     18 from taskgraph.util.treeherder import join_symbol
     19 from voluptuous import Any, Extra, Optional, Required
     20 
     21 import gecko_taskgraph
     22 from gecko_taskgraph.transforms.task import task_description_schema
     23 
     24 from ..util.cached_tasks import add_optimization
     25 
     26 CACHE_TYPE = "content.v1"
     27 
     28 FETCH_SCHEMA = Schema({
     29    # Name of the task.
     30    Required("name"): str,
     31    # Relative path (from config.path) to the file the task was defined
     32    # in.
     33    Optional("task-from"): str,
     34    # Description of the task.
     35    Required("description"): str,
     36    Optional(
     37        "fetch-alias",
     38        description="An alias that can be used instead of the real fetch job name in "
     39        "fetch stanzas for jobs.",
     40    ): str,
     41    Optional(
     42        "artifact-prefix",
     43        description="The prefix of the taskcluster artifact being uploaded. "
     44        "Defaults to `public/`; if it starts with something other than "
     45        "`public/` the artifact will require scopes to access.",
     46    ): str,
     47    Optional("attributes"): {str: object},
     48    Optional("run-on-repo-type"): task_description_schema["run-on-repo-type"],
     49    Required("fetch"): {
     50        Required("type"): str,
     51        Extra: object,
     52    },
     53 })
     54 
     55 
     56 # define a collection of payload builders, depending on the worker implementation
     57 fetch_builders = {}
     58 
     59 
     60 @attr.s(frozen=True)
     61 class FetchBuilder:
     62    schema = attr.ib(type=Schema)
     63    builder = attr.ib()
     64 
     65 
     66 def fetch_builder(name, schema):
     67    schema = Schema({Required("type"): name}).extend(schema)
     68 
     69    def wrap(func):
     70        fetch_builders[name] = FetchBuilder(schema, func)
     71        return func
     72 
     73    return wrap
     74 
     75 
     76 transforms = TransformSequence()
     77 transforms.add_validate(FETCH_SCHEMA)
     78 
     79 
     80 @transforms.add
     81 def process_fetch_job(config, jobs):
     82    # Converts fetch-url entries to the job schema.
     83    for job in jobs:
     84        typ = job["fetch"]["type"]
     85        name = job["name"]
     86        fetch = job.pop("fetch")
     87 
     88        if typ not in fetch_builders:
     89            raise Exception(f"Unknown fetch type {typ} in fetch {name}")
     90        validate_schema(fetch_builders[typ].schema, fetch, f"In task.fetch {name!r}:")
     91 
     92        job.update(configure_fetch(config, typ, name, fetch))
     93 
     94        yield job
     95 
     96 
     97 def configure_fetch(config, typ, name, fetch):
     98    if typ not in fetch_builders:
     99        raise Exception(f"No fetch type {typ} in fetch {name}")
    100    validate_schema(fetch_builders[typ].schema, fetch, f"In task.fetch {name!r}:")
    101 
    102    return fetch_builders[typ].builder(config, name, fetch)
    103 
    104 
    105 @transforms.add
    106 def make_task(config, jobs):
    107    # Fetch tasks are idempotent and immutable. Have them live for
    108    # essentially forever.
    109    if config.params["level"] == "3":
    110        expires = "1000 years"
    111    else:
    112        expires = "28 days"
    113 
    114    for job in jobs:
    115        name = job["name"]
    116        artifact_prefix = job.get("artifact-prefix", "public")
    117        env = job.get("env", {})
    118        env.update({"UPLOAD_DIR": "/builds/worker/artifacts"})
    119        attributes = job.get("attributes", {})
    120        attributes["artifact_prefix"] = artifact_prefix
    121        attributes["fetch-artifact"] = mozpath.join(
    122            artifact_prefix, job["artifact_name"]
    123        )
    124        alias = job.get("fetch-alias")
    125        if alias:
    126            attributes["fetch-alias"] = alias
    127 
    128        task_expires = "28 days" if attributes.get("cached_task") is False else expires
    129        artifact_expires = (
    130            "2 days" if attributes.get("cached_task") is False else expires
    131        )
    132 
    133        task = {
    134            "attributes": attributes,
    135            "name": name,
    136            "description": job["description"],
    137            "expires-after": task_expires,
    138            "label": "fetch-%s" % name,
    139            "run-on-projects": [],
    140            "run-on-repo-type": job.get("run-on-repo-type", ["git", "hg"]),
    141            "treeherder": {
    142                "symbol": join_symbol("Fetch", name),
    143                "kind": "build",
    144                "platform": "fetch/opt",
    145                "tier": 1,
    146            },
    147            "run": {
    148                "using": "run-task",
    149                "checkout": False,
    150                "command": job["command"],
    151            },
    152            "worker-type": "b-linux",
    153            "worker": {
    154                "chain-of-trust": True,
    155                "docker-image": {"in-tree": job.get("docker-image", "fetch")},
    156                "env": env,
    157                "max-run-time": 900,
    158                "artifacts": [
    159                    {
    160                        "type": "directory",
    161                        "name": artifact_prefix,
    162                        "path": "/builds/worker/artifacts",
    163                        "expires-after": artifact_expires,
    164                    }
    165                ],
    166            },
    167        }
    168 
    169        if job.get("secret", None):
    170            task["scopes"] = ["secrets:get:" + job.get("secret")]
    171            task["worker"]["taskcluster-proxy"] = True
    172 
    173        # Fetches that are used for local development need to be built on a
    174        # level-3 branch to be installable via `mach bootstrap`.
    175        if attributes.get("local-fetch"):
    176            task["run-on-projects"] = ["integration", "release"]
    177 
    178        if not taskgraph.fast:
    179            cache_name = task["label"].replace(f"{config.kind}-", "", 1)
    180 
    181            # This adds the level to the index path automatically.
    182            add_optimization(
    183                config,
    184                task,
    185                cache_type=CACHE_TYPE,
    186                cache_name=cache_name,
    187                digest_data=job["digest_data"],
    188            )
    189        yield task
    190 
    191 
    192 @fetch_builder(
    193    "static-url",
    194    schema={
    195        # The URL to download.
    196        Required("url"): str,
    197        # The SHA-256 of the downloaded content.
    198        Required("sha256"): str,
    199        # Size of the downloaded entity, in bytes.
    200        Required("size"): int,
    201        # GPG signature verification.
    202        Optional("gpg-signature"): {
    203            # URL where GPG signature document can be obtained. Can contain the
    204            # value ``{url}``, which will be substituted with the value from
    205            # ``url``.
    206            Required("sig-url"): str,
    207            # Path to file containing GPG public key(s) used to validate
    208            # download.
    209            Required("key-path"): str,
    210        },
    211        Optional("headers"): [str],
    212        # The name to give to the generated artifact. Defaults to the file
    213        # portion of the URL. Using a different extension converts the
    214        # archive to the given type. Only conversion to .tar.zst is
    215        # supported.
    216        Optional("artifact-name"): str,
    217        # Strip the given number of path components at the beginning of
    218        # each file entry in the archive.
    219        # Requires an artifact-name ending with .tar.zst.
    220        Optional("strip-components"): int,
    221        # Add the given prefix to each file entry in the archive.
    222        # Requires an artifact-name ending with .tar.zst.
    223        Optional("add-prefix"): str,
    224        # IMPORTANT: when adding anything that changes the behavior of the task,
    225        # it is important to update the digest data used to compute cache hits.
    226    },
    227 )
    228 def create_fetch_url_task(config, name, fetch):
    229    artifact_name = fetch.get("artifact-name")
    230    if not artifact_name:
    231        artifact_name = fetch["url"].split("/")[-1]
    232 
    233    command = [
    234        "/builds/worker/bin/fetch-content",
    235        "static-url",
    236    ]
    237 
    238    # Arguments that matter to the cache digest
    239    args = [
    240        "--sha256",
    241        fetch["sha256"],
    242        "--size",
    243        "%d" % fetch["size"],
    244    ]
    245 
    246    if fetch.get("strip-components"):
    247        args.extend(["--strip-components", "%d" % fetch["strip-components"]])
    248 
    249    if fetch.get("add-prefix"):
    250        args.extend(["--add-prefix", fetch["add-prefix"]])
    251 
    252    command.extend(args)
    253 
    254    env = {}
    255 
    256    if "gpg-signature" in fetch:
    257        sig_url = fetch["gpg-signature"]["sig-url"].format(url=fetch["url"])
    258        key_path = os.path.join(
    259            gecko_taskgraph.GECKO, fetch["gpg-signature"]["key-path"]
    260        )
    261 
    262        with open(key_path) as fh:
    263            gpg_key = fh.read()
    264 
    265        env["FETCH_GPG_KEY"] = gpg_key
    266        command.extend([
    267            "--gpg-sig-url",
    268            sig_url,
    269            "--gpg-key-env",
    270            "FETCH_GPG_KEY",
    271        ])
    272 
    273    for header in fetch.get("headers", []):
    274        command.extend(["--header", header])
    275 
    276    command.extend([
    277        fetch["url"],
    278        "/builds/worker/artifacts/%s" % artifact_name,
    279    ])
    280 
    281    return {
    282        "command": command,
    283        "artifact_name": artifact_name,
    284        "env": env,
    285        # We don't include the GPG signature in the digest because it isn't
    286        # materially important for caching: GPG signatures are supplemental
    287        # trust checking beyond what the shasum already provides.
    288        "digest_data": args + [artifact_name],
    289    }
    290 
    291 
    292 @fetch_builder(
    293    "git",
    294    schema={
    295        Required("repo"): str,
    296        Required(Any("revision", "branch")): str,
    297        Optional("include-dot-git"): bool,
    298        Optional("artifact-name"): str,
    299        Optional("path-prefix"): str,
    300        # ssh-key is a taskcluster secret path (e.g. project/civet/github-deploy-key)
    301        # In the secret dictionary, the key should be specified as
    302        #  "ssh_privkey": "-----BEGIN OPENSSH PRIVATE KEY-----\nkfksnb3jc..."
    303        # n.b. The OpenSSH private key file format requires a newline at the end of the file.
    304        Optional("ssh-key"): str,
    305    },
    306 )
    307 def create_git_fetch_task(config, name, fetch):
    308    path_prefix = fetch.get("path-prefix")
    309    if not path_prefix:
    310        path_prefix = fetch["repo"].rstrip("/").rsplit("/", 1)[-1]
    311    artifact_name = fetch.get("artifact-name")
    312    if not artifact_name:
    313        artifact_name = f"{path_prefix}.tar.zst"
    314 
    315    if "revision" in fetch and "branch" in fetch:
    316        raise Exception("revision and branch cannot be used in the same context")
    317 
    318    revision_or_branch = None
    319 
    320    if "revision" in fetch:
    321        revision_or_branch = fetch["revision"]
    322        if not re.match(r"[0-9a-fA-F]{40}", fetch["revision"]):
    323            raise Exception(f'Revision is not a sha1 in fetch task "{name}"')
    324    else:
    325        # we are sure we are dealing with a branch
    326        revision_or_branch = fetch["branch"]
    327 
    328    args = [
    329        "/builds/worker/bin/fetch-content",
    330        "git-checkout-archive",
    331        "--path-prefix",
    332        path_prefix,
    333        fetch["repo"],
    334        revision_or_branch,
    335        "/builds/worker/artifacts/%s" % artifact_name,
    336    ]
    337 
    338    ssh_key = fetch.get("ssh-key")
    339    if ssh_key:
    340        args.append("--ssh-key-secret")
    341        args.append(ssh_key)
    342 
    343    digest_data = [revision_or_branch, path_prefix, artifact_name]
    344    if fetch.get("include-dot-git", False):
    345        args.append("--include-dot-git")
    346        digest_data.append(".git")
    347 
    348    return {
    349        "command": args,
    350        "artifact_name": artifact_name,
    351        "digest_data": digest_data,
    352        "secret": ssh_key,
    353    }
    354 
    355 
    356 @fetch_builder(
    357    "onnxruntime-deps-fetch",
    358    schema={
    359        Required("repo"): str,
    360        Required("revision"): str,
    361        Required("artifact-name"): str,
    362    },
    363 )
    364 def create_onnxruntime_deps_fetch_task(config, name, fetch):
    365    artifact_name = fetch.get("artifact-name")
    366    workdir = "/builds/worker"
    367 
    368    script = os.path.join(workdir, "bin/fetch-onnxruntime-deps.sh")
    369    repo = fetch["repo"]
    370    revision = fetch["revision"]
    371 
    372    cmd = ["bash", "-c", f"cd {workdir} && /bin/sh {script} {repo} {revision}"]
    373 
    374    return {
    375        "command": cmd,
    376        "artifact_name": artifact_name,
    377        "docker-image": "fetch-more",
    378        "digest_data": [
    379            f"repo={repo}",
    380            f"revision={revision}",
    381            f"artifact_name={artifact_name}",
    382        ],
    383    }
    384 
    385 
    386 @fetch_builder(
    387    "chromium-fetch",
    388    schema={
    389        Required("script"): str,
    390        # Platform type for chromium build
    391        Required("platform"): str,
    392        # Chromium revision to obtain
    393        Optional("revision"): str,
    394        # The name to give to the generated artifact.
    395        Required("artifact-name"): str,
    396    },
    397 )
    398 def create_chromium_fetch_task(config, name, fetch):
    399    artifact_name = fetch.get("artifact-name")
    400 
    401    workdir = "/builds/worker"
    402 
    403    platform = fetch.get("platform")
    404    revision = fetch.get("revision")
    405 
    406    args = "--platform " + shell_quote(platform)
    407    if revision:
    408        args += " --revision " + shell_quote(revision)
    409 
    410    cmd = [
    411        "bash",
    412        "-c",
    413        "cd {} && /usr/bin/python3 {} {}".format(workdir, fetch["script"], args),
    414    ]
    415 
    416    return {
    417        "command": cmd,
    418        "artifact_name": artifact_name,
    419        "docker-image": "fetch-more",
    420        "digest_data": [
    421            f"revision={revision}",
    422            f"platform={platform}",
    423            f"artifact_name={artifact_name}",
    424        ],
    425    }
    426 
    427 
    428 @fetch_builder(
    429    "cft-chromedriver-fetch",
    430    schema={
    431        Required("script"): str,
    432        # Platform type for chromium build
    433        Required("platform"): str,
    434        # The name to give to the generated artifact.
    435        Required("artifact-name"): str,
    436        # The chrome channel to download from.
    437        Optional("channel"): str,
    438        # Determine if we are fetching a backup (stable version - 1) driver.
    439        Optional("backup"): bool,
    440        # Pin a stable version of chrome to download from. To be used together with `backup`.
    441        Optional("version"): str,
    442    },
    443 )
    444 def create_cft_canary_fetch_task(config, name, fetch):
    445    artifact_name = fetch.get("artifact-name")
    446 
    447    workdir = "/builds/worker"
    448 
    449    platform = fetch.get("platform")
    450    channel = fetch.get("channel")
    451    version = fetch.get("version")
    452    backup = fetch.get("backup", False)
    453 
    454    args = "--platform " + shell_quote(platform)
    455    if channel:
    456        args += " --channel " + shell_quote(channel)
    457 
    458    if backup:
    459        args += " --backup"
    460        # only allow pinning version with backup
    461        if version:
    462            args += " --version " + shell_quote(version)
    463 
    464    cmd = [
    465        "bash",
    466        "-c",
    467        "cd {} && /usr/bin/python3 {} {}".format(workdir, fetch["script"], args),
    468    ]
    469 
    470    return {
    471        "command": cmd,
    472        "artifact_name": artifact_name,
    473        "docker-image": "fetch-more",
    474        "digest_data": [
    475            f"platform={platform}",
    476            f"artifact_name={artifact_name}",
    477        ],
    478    }