tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

manifestdownload.py (6447B)


      1 # This Source Code Form is subject to the terms of the Mozilla Public
      2 # License, v. 2.0. If a copy of the MPL was not distributed with this
      3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
      4 
      5 import os
      6 import tarfile
      7 from datetime import datetime, timedelta
      8 
      9 import mozversioncontrol
     10 import requests
     11 
     12 try:
     13    from cStringIO import StringIO as BytesIO
     14 except ImportError:
     15    from io import BytesIO
     16 
     17 HEADERS = {"User-Agent": "wpt manifest download"}
     18 
     19 
     20 def get(logger, url, **kwargs):
     21    logger.debug(url)
     22    if "headers" not in kwargs:
     23        kwargs["headers"] = HEADERS
     24    return requests.get(url, **kwargs)
     25 
     26 
     27 def abs_path(path):
     28    return os.path.abspath(os.path.expanduser(path))
     29 
     30 
     31 def get_commits(logger, repo_root):
     32    try:
     33        repo = mozversioncontrol.get_repository_object(repo_root)
     34    except mozversioncontrol.InvalidRepoPath:
     35        logger.warning("No VCS found for path %s" % repo_root)
     36        return []
     37 
     38    return repo.get_commits(
     39        limit=50,
     40        follow=[
     41            "testing/web-platform/tests",
     42            "testing/web-platform/mozilla/tests",
     43        ],
     44    )
     45 
     46 
     47 def should_download(logger, manifest_paths, rebuild_time=timedelta(days=5)):
     48    # TODO: Improve logic for when to download. Maybe if x revisions behind?
     49    for manifest_path in manifest_paths:
     50        if not os.path.exists(manifest_path):
     51            return True
     52        mtime = datetime.fromtimestamp(os.path.getmtime(manifest_path))
     53        if mtime < datetime.now() - rebuild_time:
     54            return True
     55        if os.path.getsize(manifest_path) == 0:
     56            return True
     57 
     58    logger.info("Skipping manifest download because existing file is recent")
     59    return False
     60 
     61 
     62 def taskcluster_url(logger, commits):
     63    artifact_path = "/artifacts/public/manifests.tar.gz"
     64 
     65    repos = {
     66        "mozilla-central": "mozilla-central",
     67        "integration/autoland": "autoland",
     68        "releases/mozilla-esr115": "mozilla-esr115",
     69        "releases/mozilla-esr128": "mozilla-esr128",
     70        "releases/mozilla-esr140": "mozilla-esr140",
     71    }
     72    cset_url = (
     73        "https://hg.mozilla.org/{repo}/json-pushes?"
     74        "changeset={changeset}&version=2&tipsonly=1"
     75    )
     76 
     77    tc_url = (
     78        "https://firefox-ci-tc.services.mozilla.com/api/index/v1/"
     79        "task/gecko.v2.{name}."
     80        "revision.{changeset}.source.manifest-upload"
     81    )
     82 
     83    default = (
     84        "https://firefox-ci-tc.services.mozilla.com/api/index/v1/"
     85        "task/gecko.v2.mozilla-central.latest.source.manifest-upload" + artifact_path
     86    )
     87 
     88    for revision in commits:
     89        req = None
     90 
     91        if revision == 40 * "0":
     92            continue
     93 
     94        for repo_path, index_name in repos.items():
     95            try:
     96                req_headers = HEADERS.copy()
     97                req_headers.update({"Accept": "application/json"})
     98                req = get(
     99                    logger,
    100                    cset_url.format(changeset=revision, repo=repo_path),
    101                    headers=req_headers,
    102                )
    103                req.raise_for_status()
    104            except requests.exceptions.RequestException:
    105                if req is not None and req.status_code == 404:
    106                    # The API returns a 404 if it can't find a changeset for the revision.
    107                    logger.debug("%s not found in %s" % (revision, repo_path))
    108                    continue
    109                else:
    110                    return default
    111 
    112            result = req.json()
    113 
    114            pushes = result["pushes"]
    115            if not pushes:
    116                logger.debug("Error reading response; 'pushes' key not found")
    117                continue
    118            [cset] = next(iter(pushes.values()))["changesets"]
    119 
    120            tc_index_url = tc_url.format(changeset=cset, name=index_name)
    121            try:
    122                req = get(logger, tc_index_url)
    123            except requests.exceptions.RequestException:
    124                return default
    125 
    126            if req.status_code == 200:
    127                return tc_index_url + artifact_path
    128 
    129    logger.info(
    130        "Can't find a commit-specific manifest so just using the most recent one"
    131    )
    132 
    133    return default
    134 
    135 
    136 def download_manifest(logger, test_paths, commits_func, url_func, force=False):
    137    manifest_paths = [
    138        (item["manifest_path"] if isinstance(item, dict) else item.manifest_path)
    139        for item in test_paths.values()
    140    ]
    141 
    142    if not force and not should_download(logger, manifest_paths):
    143        return True
    144 
    145    commits = commits_func()
    146 
    147    url = url_func(logger, commits)
    148    if not url:
    149        logger.warning("No generated manifest found")
    150        return False
    151 
    152    logger.info("Downloading manifest from %s" % url)
    153    try:
    154        req = get(logger, url)
    155    except Exception:
    156        logger.warning("Downloading pregenerated manifest failed")
    157        return False
    158 
    159    if req.status_code != 200:
    160        logger.warning(
    161            "Downloading pregenerated manifest failed; got "
    162            "HTTP status %d" % req.status_code
    163        )
    164        return False
    165 
    166    tar = tarfile.open(mode="r:gz", fileobj=BytesIO(req.content))
    167    for paths in test_paths.values():
    168        manifest_rel_path = (
    169            paths["manifest_rel_path"]
    170            if isinstance(paths, dict)
    171            else paths.manifest_rel_path
    172        )
    173        manifest_path = (
    174            paths["manifest_path"] if isinstance(paths, dict) else paths.manifest_path
    175        )
    176 
    177        try:
    178            member = tar.getmember(manifest_rel_path.replace(os.path.sep, "/"))
    179        except KeyError:
    180            logger.warning("Failed to find downloaded manifest %s" % manifest_rel_path)
    181        else:
    182            try:
    183                logger.debug("Unpacking %s to %s" % (member.name, manifest_path))
    184                src = tar.extractfile(member)
    185                with open(manifest_path, "wb") as dest:
    186                    dest.write(src.read())
    187                src.close()
    188            except OSError:
    189                import traceback
    190 
    191                logger.warning(
    192                    "Failed to decompress %s:\n%s"
    193                    % (manifest_rel_path, traceback.format_exc())
    194                )
    195                return False
    196 
    197        os.utime(manifest_path, None)
    198 
    199    return True
    200 
    201 
    202 def download_from_taskcluster(logger, repo_root, test_paths, force=False):
    203    return download_manifest(
    204        logger,
    205        test_paths,
    206        lambda: get_commits(logger, repo_root),
    207        taskcluster_url,
    208        force,
    209    )