download.py (5883B)
1 import argparse 2 import bz2 3 import gzip 4 import json 5 import io 6 import os 7 from datetime import datetime, timedelta 8 from typing import Any, Callable, List, Optional, Text, cast 9 from urllib.request import urlopen 10 11 try: 12 import zstandard 13 except ImportError: 14 zstandard = cast(Any, None) 15 16 from .utils import git 17 18 from . import log 19 20 21 here = os.path.dirname(__file__) 22 23 wpt_root = os.path.abspath(os.path.join(here, os.pardir, os.pardir)) 24 logger = log.get_logger() 25 26 27 def abs_path(path: Text) -> Text: 28 return os.path.abspath(os.path.expanduser(path)) 29 30 31 def should_download(manifest_path: Text, rebuild_time: timedelta = timedelta(days=5)) -> bool: 32 if not os.path.exists(manifest_path): 33 return True 34 mtime = datetime.fromtimestamp(os.path.getmtime(manifest_path)) 35 if mtime < datetime.now() - rebuild_time: 36 return True 37 logger.info("Skipping manifest download because existing file is recent") 38 return False 39 40 41 def merge_pr_tags(repo_root: Text, max_count: int = 50) -> List[Text]: 42 gitfunc = git(repo_root) 43 tags: List[Text] = [] 44 if gitfunc is None: 45 return tags 46 for line in gitfunc("log", "--format=%D", "--max-count=%s" % max_count).split("\n"): 47 for ref in line.split(", "): 48 if ref.startswith("tag: merge_pr_"): 49 tags.append(ref[5:]) 50 return tags 51 52 53 def score_name(name: Text) -> Optional[int]: 54 """Score how much we like each filename, lower wins, None rejects""" 55 56 # Accept both ways of naming the manifest asset, even though 57 # there's no longer a reason to include the commit sha. 58 if name.startswith("MANIFEST-") or name.startswith("MANIFEST."): 59 if zstandard and name.endswith("json.zst"): 60 return 1 61 if name.endswith(".json.bz2"): 62 return 2 63 if name.endswith(".json.gz"): 64 return 3 65 return None 66 67 68 def github_url(tags: List[Text]) -> Optional[List[Text]]: 69 for tag in tags: 70 url = "https://api.github.com/repos/web-platform-tests/wpt/releases/tags/%s" % tag 71 try: 72 resp = urlopen(url) 73 except Exception: 74 logger.warning("Fetching %s failed" % url) 75 continue 76 77 if resp.code != 200: 78 logger.warning("Fetching %s failed; got HTTP status %d" % (url, resp.code)) 79 continue 80 81 try: 82 release = json.load(resp.fp) 83 except ValueError: 84 logger.warning("Response was not valid JSON") 85 return None 86 87 candidates = [] 88 for item in release["assets"]: 89 score = score_name(item["name"]) 90 if score is not None: 91 candidates.append((score, item["browser_download_url"])) 92 93 return [item[1] for item in sorted(candidates)] 94 95 return None 96 97 98 def download_manifest( 99 manifest_path: Text, 100 tags_func: Callable[[], List[Text]], 101 url_func: Callable[[List[Text]], Optional[List[Text]]], 102 force: bool = False 103 ) -> bool: 104 if not force and not should_download(manifest_path): 105 return False 106 107 tags = tags_func() 108 109 urls = url_func(tags) 110 if not urls: 111 logger.warning("No generated manifest found") 112 return False 113 114 for url in urls: 115 logger.info("Downloading manifest from %s" % url) 116 try: 117 resp = urlopen(url) 118 except Exception: 119 logger.warning("Downloading pregenerated manifest failed") 120 continue 121 122 if resp.code != 200: 123 logger.warning("Downloading pregenerated manifest failed; got HTTP status %d" % 124 resp.code) 125 continue 126 127 if url.endswith(".zst"): 128 if not zstandard: 129 continue 130 try: 131 dctx = zstandard.ZstdDecompressor() 132 decompressed = dctx.decompress(resp.read()) 133 except OSError: 134 logger.warning("Failed to decompress downloaded file") 135 continue 136 elif url.endswith(".bz2"): 137 try: 138 decompressed = bz2.decompress(resp.read()) 139 except OSError: 140 logger.warning("Failed to decompress downloaded file") 141 continue 142 elif url.endswith(".gz"): 143 fileobj = io.BytesIO(resp.read()) 144 try: 145 with gzip.GzipFile(fileobj=fileobj) as gzf: 146 data = gzf.read() 147 decompressed = data 148 except OSError: 149 logger.warning("Failed to decompress downloaded file") 150 continue 151 else: 152 logger.warning("Unknown file extension: %s" % url) 153 continue 154 break 155 else: 156 return False 157 158 try: 159 with open(manifest_path, "wb") as f: 160 f.write(decompressed) 161 except Exception: 162 logger.warning("Failed to write manifest") 163 return False 164 logger.info("Manifest downloaded") 165 return True 166 167 168 def create_parser() -> argparse.ArgumentParser: 169 parser = argparse.ArgumentParser() 170 parser.add_argument( 171 "-p", "--path", type=abs_path, help="Path to manifest file.") 172 parser.add_argument( 173 "--tests-root", type=abs_path, default=wpt_root, help="Path to root of tests.") 174 parser.add_argument( 175 "--force", action="store_true", 176 help="Always download, even if the existing manifest is recent") 177 return parser 178 179 180 def download_from_github(path: Text, tests_root: Text, force: bool = False) -> bool: 181 return download_manifest(path, lambda: merge_pr_tags(tests_root), github_url, 182 force=force) 183 184 185 def run(**kwargs: Any) -> int: 186 if kwargs["path"] is None: 187 path = os.path.join(kwargs["tests_root"], "MANIFEST.json") 188 else: 189 path = kwargs["path"] 190 success = download_from_github(path, kwargs["tests_root"], kwargs["force"]) 191 return 0 if success else 1