tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

funsize.py (16538B)


      1 #!/usr/bin/env python3
      2 # This Source Code Form is subject to the terms of the Mozilla Public
      3 # License, v. 2.0. If a copy of the MPL was not distributed with this
      4 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
      5 
      6 import argparse
      7 import asyncio
      8 import configparser
      9 import json
     10 import logging
     11 import os
     12 import shutil
     13 import tempfile
     14 import time
     15 from contextlib import AsyncExitStack
     16 from pathlib import Path
     17 
     18 import aiohttp
     19 from mardor.reader import MarReader
     20 from mardor.signing import get_keysize
     21 from scriptworker.utils import get_hash, retry_async
     22 
     23 log = logging.getLogger(__name__)
     24 
     25 
     26 ROOT_URL = os.environ.get(
     27    "TASKCLUSTER_ROOT_URL", "https://firefox-ci-tc.services.mozilla.com"
     28 )
     29 QUEUE_PREFIX = f"{ROOT_URL}/api/queue/"
     30 ALLOWED_URL_PREFIXES = (
     31    "http://download.cdn.mozilla.net/pub/mozilla.org/firefox/nightly/",
     32    "http://download.cdn.mozilla.net/pub/firefox/nightly/",
     33    "http://ftp.mozilla.org/",
     34    "http://download.mozilla.org/",
     35    "https://archive.mozilla.org/",
     36    "http://archive.mozilla.org/",
     37    QUEUE_PREFIX,
     38 )
     39 STAGING_URL_PREFIXES = (
     40    "http://ftp.stage.mozaws.net/",
     41    "https://ftp.stage.mozaws.net/",
     42 )
     43 
     44 BCJ_OPTIONS = {
     45    "x86": ["--x86"],
     46    "x86_64": ["--x86"],
     47    "aarch64": [],
     48    # macOS Universal Builds
     49    "macos-x86_64-aarch64": [],
     50 }
     51 
     52 
     53 def strtobool(value: str):
     54    # Copied from `mach.util` since this script runs outside of a mach environment
     55    # Reimplementation of distutils.util.strtobool
     56    # https://docs.python.org/3.9/distutils/apiref.html#distutils.util.strtobool
     57    true_vals = ("y", "yes", "t", "true", "on", "1")
     58    false_vals = ("n", "no", "f", "false", "off", "0")
     59 
     60    value = value.lower()
     61    if value in true_vals:
     62        return 1
     63    if value in false_vals:
     64        return 0
     65 
     66    raise ValueError(f"Expected one of: {', '.join(true_vals + false_vals)}")
     67 
     68 
     69 def verify_signature(mar, cert):
     70    log.info("Checking %s signature", mar)
     71    with open(mar, "rb") as mar_fh:
     72        m = MarReader(mar_fh)
     73        if not m.verify(verify_key=cert):
     74            raise ValueError(
     75                "MAR Signature invalid: %s (%s) against %s", mar, m.signature_type, cert
     76            )
     77 
     78 
     79 def process_arguments():
     80    parser = argparse.ArgumentParser()
     81    parser.add_argument("--artifacts-dir", required=True)
     82    parser.add_argument("--signing-cert", type=argparse.FileType("rb"), required=True)
     83    parser.add_argument("--task-definition", required=True, type=argparse.FileType("r"))
     84    parser.add_argument(
     85        "--allow-staging-prefixes",
     86        action="store_true",
     87        default=strtobool(os.environ.get("FUNSIZE_ALLOW_STAGING_PREFIXES", "false")),
     88        help="Allow files from staging buckets.",
     89    )
     90    parser.add_argument(
     91        "-q",
     92        "--quiet",
     93        dest="log_level",
     94        action="store_const",
     95        const=logging.INFO,
     96        default=logging.DEBUG,
     97    )
     98    parser.add_argument(
     99        "--arch",
    100        type=str,
    101        required=True,
    102        choices=BCJ_OPTIONS.keys(),
    103        help="The archtecture you are building.",
    104    )
    105    return parser.parse_args()
    106 
    107 
    108 def validate_mar_channel_id(mar, channel_ids):
    109    log.info("Checking %s for MAR_CHANNEL_ID %s", mar, channel_ids)
    110    # We may get a string with a list representation, or a single entry string.
    111    channel_ids = set(channel_ids.split(","))
    112 
    113    product_info = MarReader(open(mar, "rb")).productinfo
    114    if not isinstance(product_info, tuple):
    115        raise ValueError(f"Malformed product information in mar: {product_info}")
    116 
    117    found_channel_ids = set(product_info[1].split(","))
    118 
    119    if not found_channel_ids.issubset(channel_ids):
    120        raise ValueError(
    121            f"MAR_CHANNEL_ID mismatch, {product_info[1]} not in {channel_ids}"
    122        )
    123 
    124    log.info("%s channel %s in %s", mar, product_info[1], channel_ids)
    125 
    126 
    127 async def retry_download(*args, semaphore=None, **kwargs):  # noqa: E999
    128    """Retry download() calls."""
    129    async with AsyncExitStack() as stack:
    130        if semaphore:
    131            await stack.enter_async_context(semaphore)
    132        await retry_async(
    133            download,
    134            retry_exceptions=(aiohttp.ClientError, asyncio.TimeoutError),
    135            args=args,
    136            kwargs=kwargs,
    137        )
    138 
    139 
    140 def verify_allowed_url(mar, allowed_url_prefixes):
    141    if not any(mar.startswith(prefix) for prefix in allowed_url_prefixes):
    142        raise ValueError(
    143            f"{mar} is not in allowed URL prefixes: {allowed_url_prefixes}"
    144        )
    145 
    146 
    147 async def download(url, dest, mode=None):  # noqa: E999
    148    log.info("Downloading %s to %s", url, dest)
    149    chunk_size = 4096
    150    bytes_downloaded = 0
    151    async with aiohttp.ClientSession(raise_for_status=True) as session:
    152        start = time.time()
    153        async with session.get(url, timeout=120) as resp:
    154            # Additional early logging for download timeouts.
    155            log.debug("Fetching from url %s", resp.url)
    156            for history in resp.history:
    157                log.debug("Redirection history: %s", history.url)
    158            log.debug("Headers for %s: %s", resp.url, resp.headers)
    159            if "Content-Length" in resp.headers:
    160                log.debug(
    161                    "Content-Length expected for %s: %s",
    162                    url,
    163                    resp.headers["Content-Length"],
    164                )
    165            log_interval = chunk_size * 1024
    166            with open(dest, "wb") as fd:
    167                while True:
    168                    chunk = await resp.content.read(chunk_size)
    169                    if not chunk:
    170                        break
    171                    fd.write(chunk)
    172                    bytes_downloaded += len(chunk)
    173                    log_interval -= len(chunk)
    174                    if log_interval <= 0:
    175                        log.debug("Bytes downloaded for %s: %d", url, bytes_downloaded)
    176                        log_interval = chunk_size * 1024
    177            end = time.time()
    178            log.info(
    179                "Downloaded %s, %s bytes in %s seconds: sha256:%s",
    180                url,
    181                bytes_downloaded,
    182                int(end - start),
    183                get_hash(dest, hash_alg="sha256"),
    184            )
    185            if mode:
    186                log.info("chmod %o %s", mode, dest)
    187                os.chmod(dest, mode)
    188 
    189 
    190 async def download_buildsystem_bits(partials_config, downloads, tools_dir):
    191    """Download external tools needed to make partials."""
    192 
    193    # We're making the assumption that the "to" mar is the same for all,
    194    # as that's the way this task is currently used.
    195    to_url = extract_download_urls(partials_config, mar_type="to").pop()
    196 
    197    repo = get_option(
    198        downloads[to_url]["extracted_path"],
    199        filename="platform.ini",
    200        section="Build",
    201        option="SourceRepository",
    202    )
    203    revision = get_option(
    204        downloads[to_url]["extracted_path"],
    205        filename="platform.ini",
    206        section="Build",
    207        option="SourceStamp",
    208    )
    209 
    210    urls = {
    211        "make_incremental_update.sh": f"{repo}/raw-file/{revision}/tools/"
    212        "update-packaging/make_incremental_update.sh",
    213        "common.sh": f"{repo}/raw-file/{revision}/tools/update-packaging/common.sh",
    214        "mar": "https://archive.mozilla.org/pub/mozilla.org/firefox/nightly/"
    215        "latest-mozilla-central/mar-tools/linux64/mar",
    216        "mbsdiff": "https://archive.mozilla.org/pub/mozilla.org/firefox/nightly/"
    217        "latest-mozilla-central/mar-tools/linux64/mbsdiff",
    218    }
    219    for filename, url in urls.items():
    220        filename = tools_dir / filename
    221        await retry_download(url, dest=filename, mode=0o755)
    222 
    223 
    224 def find_file(directory, filename):
    225    log.debug("Searching for %s in %s", filename, directory)
    226    return next(Path(directory).rglob(filename))
    227 
    228 
    229 def get_option(directory, filename, section, option):
    230    log.info("Extracting [%s]: %s from %s/**/%s", section, option, directory, filename)
    231    f = find_file(directory, filename)
    232    config = configparser.ConfigParser()
    233    config.read(f)
    234    rv = config.get(section, option)
    235    log.info("Found %s", rv)
    236    return rv
    237 
    238 
    239 def extract_download_urls(partials_config, mar_type):
    240    """Extract a set of urls to download from the task configuration.
    241 
    242    mar_type should be one of "from", "to"
    243    """
    244    return {definition[f"{mar_type}_mar"] for definition in partials_config}
    245 
    246 
    247 async def download_and_verify_mars(partials_config, allowed_url_prefixes, signing_cert):
    248    """Download, check signature, channel ID and unpack MAR files."""
    249    # Separate these categories so we can opt to perform checks on only 'to' downloads.
    250    from_urls = extract_download_urls(partials_config, mar_type="from")
    251    to_urls = extract_download_urls(partials_config, mar_type="to")
    252    tasks = list()
    253    downloads = dict()
    254 
    255    semaphore = asyncio.Semaphore(2)  # Magic 2 to reduce network timeout errors.
    256    for url in from_urls.union(to_urls):
    257        verify_allowed_url(url, allowed_url_prefixes)
    258        downloads[url] = {
    259            "download_path": Path(tempfile.mkdtemp()) / Path(url).name,
    260        }
    261        tasks.append(
    262            retry_download(url, downloads[url]["download_path"], semaphore=semaphore)
    263        )
    264 
    265    await asyncio.gather(*tasks)
    266 
    267    for url in downloads:
    268        # Verify signature, but not from an artifact as we don't
    269        # depend on the signing task
    270        if not os.getenv("MOZ_DISABLE_MAR_CERT_VERIFICATION") and not url.startswith(
    271            QUEUE_PREFIX
    272        ):
    273            verify_signature(downloads[url]["download_path"], signing_cert)
    274 
    275        # Only validate the target channel ID, as we update from beta->release
    276        if url in to_urls:
    277            validate_mar_channel_id(
    278                downloads[url]["download_path"], os.environ["MAR_CHANNEL_ID"]
    279            )
    280 
    281        downloads[url]["extracted_path"] = tempfile.mkdtemp()
    282        with open(downloads[url]["download_path"], "rb") as mar_fh:
    283            log.info(
    284                "Unpacking %s into %s",
    285                downloads[url]["download_path"],
    286                downloads[url]["extracted_path"],
    287            )
    288            m = MarReader(mar_fh)
    289            m.extract(downloads[url]["extracted_path"])
    290 
    291    return downloads
    292 
    293 
    294 async def run_command(cmd, cwd="/", env=None, label=None, silent=False):
    295    log.info("Running: %s", cmd)
    296    if not env:
    297        env = dict()
    298    process = await asyncio.create_subprocess_shell(
    299        cmd,
    300        stdout=asyncio.subprocess.PIPE,
    301        stderr=asyncio.subprocess.PIPE,
    302        cwd=cwd,
    303        env=env,
    304    )
    305    if label:
    306        label = f"{label}: "
    307    else:
    308        label = ""
    309 
    310    async def read_output(stream, label, printcmd):
    311        while True:
    312            line = await stream.readline()
    313            if line == b"":
    314                break
    315            printcmd("%s%s", label, line.decode("utf-8").rstrip())
    316 
    317    if silent:
    318        await process.wait()
    319    else:
    320        await asyncio.gather(
    321            read_output(process.stdout, label, log.info),
    322            read_output(process.stderr, label, log.warning),
    323        )
    324        await process.wait()
    325 
    326 
    327 async def generate_partial(from_dir, to_dir, dest_mar, mar_data, tools_dir, arch):
    328    log.info("Generating partial %s", dest_mar)
    329    env = os.environ.copy()
    330    env["LC_ALL"] = "C"
    331    env["MAR"] = tools_dir / "mar"
    332    env["MBSDIFF"] = tools_dir / "mbsdiff"
    333    if arch:
    334        env["BCJ_OPTIONS"] = " ".join(BCJ_OPTIONS[arch])
    335    env["MOZ_PRODUCT_VERSION"] = mar_data["version"]
    336    env["MAR_CHANNEL_ID"] = mar_data["MAR_CHANNEL_ID"]
    337    env["BRANCH"] = mar_data["branch"]
    338 
    339    make_incremental_update = tools_dir / "make_incremental_update.sh"
    340    cmd = f"{make_incremental_update} {dest_mar} {from_dir} {to_dir}"
    341 
    342    await run_command(cmd, cwd=dest_mar.parent, env=env, label=dest_mar.name)
    343    validate_mar_channel_id(dest_mar, mar_data["MAR_CHANNEL_ID"])
    344 
    345 
    346 async def manage_partial(
    347    partial_def, artifacts_dir, tools_dir, downloads, semaphore, arch=None
    348 ):
    349    from_url = partial_def["from_mar"]
    350    to_url = partial_def["to_mar"]
    351    from_path = downloads[from_url]["extracted_path"]
    352    to_path = downloads[to_url]["extracted_path"]
    353 
    354    mar_data = {
    355        "MAR_CHANNEL_ID": os.environ["MAR_CHANNEL_ID"],
    356        "version": get_option(
    357            to_path, filename="application.ini", section="App", option="Version"
    358        ),
    359        "appName": get_option(
    360            from_path, filename="application.ini", section="App", option="Name"
    361        ),
    362        # Use Gecko repo and rev from platform.ini, not application.ini
    363        "repo": get_option(
    364            to_path, filename="platform.ini", section="Build", option="SourceRepository"
    365        ),
    366        "revision": get_option(
    367            to_path, filename="platform.ini", section="Build", option="SourceStamp"
    368        ),
    369        "locale": partial_def["locale"],
    370        "from_mar": partial_def["from_mar"],
    371        "from_size": os.path.getsize(downloads[from_url]["download_path"]),
    372        "from_hash": get_hash(downloads[from_url]["download_path"], hash_alg="sha512"),
    373        "from_buildid": get_option(
    374            from_path, filename="application.ini", section="App", option="BuildID"
    375        ),
    376        "to_mar": partial_def["to_mar"],
    377        "to_size": os.path.getsize(downloads[to_url]["download_path"]),
    378        "to_hash": get_hash(downloads[to_url]["download_path"], hash_alg="sha512"),
    379        "to_buildid": get_option(
    380            to_path, filename="application.ini", section="App", option="BuildID"
    381        ),
    382        "mar": partial_def["dest_mar"],
    383    }
    384    # if branch not set explicitly use repo-name
    385    mar_data["branch"] = partial_def.get("branch", Path(mar_data["repo"]).name)
    386 
    387    for field in (
    388        "update_number",
    389        "previousVersion",
    390        "previousBuildNumber",
    391        "toVersion",
    392        "toBuildNumber",
    393    ):
    394        if field in partial_def:
    395            mar_data[field] = partial_def[field]
    396 
    397    dest_mar = Path(artifacts_dir) / mar_data["mar"]
    398 
    399    async with semaphore:
    400        await generate_partial(from_path, to_path, dest_mar, mar_data, tools_dir, arch)
    401 
    402    mar_data["size"] = os.path.getsize(dest_mar)
    403    mar_data["hash"] = get_hash(dest_mar, hash_alg="sha512")
    404    return mar_data
    405 
    406 
    407 async def async_main(args, signing_cert):
    408    tasks = []
    409 
    410    allowed_url_prefixes = list(ALLOWED_URL_PREFIXES)
    411    if args.allow_staging_prefixes:
    412        allowed_url_prefixes += STAGING_URL_PREFIXES
    413 
    414    task = json.load(args.task_definition)
    415 
    416    downloads = await download_and_verify_mars(
    417        task["extra"]["funsize"]["partials"], allowed_url_prefixes, signing_cert
    418    )
    419 
    420    tools_dir = Path(tempfile.mkdtemp())
    421    await download_buildsystem_bits(
    422        partials_config=task["extra"]["funsize"]["partials"],
    423        downloads=downloads,
    424        tools_dir=tools_dir,
    425    )
    426 
    427    # May want to consider os.cpu_count() if we ever run on osx/win.
    428    # sched_getaffinity is the list of cores we can run on, not the total.
    429    semaphore = asyncio.Semaphore(len(os.sched_getaffinity(0)))
    430    for definition in task["extra"]["funsize"]["partials"]:
    431        tasks.append(
    432            asyncio.ensure_future(
    433                retry_async(
    434                    manage_partial,
    435                    retry_exceptions=(aiohttp.ClientError, asyncio.TimeoutError),
    436                    kwargs=dict(
    437                        partial_def=definition,
    438                        artifacts_dir=args.artifacts_dir,
    439                        tools_dir=tools_dir,
    440                        arch=args.arch,
    441                        downloads=downloads,
    442                        semaphore=semaphore,
    443                    ),
    444                )
    445            )
    446        )
    447    manifest = await asyncio.gather(*tasks)
    448 
    449    for url in downloads:
    450        downloads[url]["download_path"].unlink()
    451        shutil.rmtree(downloads[url]["extracted_path"])
    452    shutil.rmtree(tools_dir)
    453 
    454    return manifest
    455 
    456 
    457 def main():
    458    args = process_arguments()
    459 
    460    logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
    461    log.setLevel(args.log_level)
    462 
    463    signing_cert = args.signing_cert.read()
    464    assert get_keysize(signing_cert) == 4096
    465 
    466    artifacts_dir = Path(args.artifacts_dir)
    467    if not artifacts_dir.exists():
    468        artifacts_dir.mkdir()
    469 
    470    loop = asyncio.get_event_loop()
    471    manifest = loop.run_until_complete(async_main(args, signing_cert))
    472    loop.close()
    473 
    474    manifest_file = artifacts_dir / "manifest.json"
    475    with open(manifest_file, "w") as fp:
    476        json.dump(manifest, fp, indent=2, sort_keys=True)
    477 
    478    log.debug(f"{json.dumps(manifest, indent=2, sort_keys=True)}")
    479 
    480 
    481 if __name__ == "__main__":
    482    main()