manifestdownload.py (6447B)
1 # This Source Code Form is subject to the terms of the Mozilla Public 2 # License, v. 2.0. If a copy of the MPL was not distributed with this 3 # file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 5 import os 6 import tarfile 7 from datetime import datetime, timedelta 8 9 import mozversioncontrol 10 import requests 11 12 try: 13 from cStringIO import StringIO as BytesIO 14 except ImportError: 15 from io import BytesIO 16 17 HEADERS = {"User-Agent": "wpt manifest download"} 18 19 20 def get(logger, url, **kwargs): 21 logger.debug(url) 22 if "headers" not in kwargs: 23 kwargs["headers"] = HEADERS 24 return requests.get(url, **kwargs) 25 26 27 def abs_path(path): 28 return os.path.abspath(os.path.expanduser(path)) 29 30 31 def get_commits(logger, repo_root): 32 try: 33 repo = mozversioncontrol.get_repository_object(repo_root) 34 except mozversioncontrol.InvalidRepoPath: 35 logger.warning("No VCS found for path %s" % repo_root) 36 return [] 37 38 return repo.get_commits( 39 limit=50, 40 follow=[ 41 "testing/web-platform/tests", 42 "testing/web-platform/mozilla/tests", 43 ], 44 ) 45 46 47 def should_download(logger, manifest_paths, rebuild_time=timedelta(days=5)): 48 # TODO: Improve logic for when to download. Maybe if x revisions behind? 49 for manifest_path in manifest_paths: 50 if not os.path.exists(manifest_path): 51 return True 52 mtime = datetime.fromtimestamp(os.path.getmtime(manifest_path)) 53 if mtime < datetime.now() - rebuild_time: 54 return True 55 if os.path.getsize(manifest_path) == 0: 56 return True 57 58 logger.info("Skipping manifest download because existing file is recent") 59 return False 60 61 62 def taskcluster_url(logger, commits): 63 artifact_path = "/artifacts/public/manifests.tar.gz" 64 65 repos = { 66 "mozilla-central": "mozilla-central", 67 "integration/autoland": "autoland", 68 "releases/mozilla-esr115": "mozilla-esr115", 69 "releases/mozilla-esr128": "mozilla-esr128", 70 "releases/mozilla-esr140": "mozilla-esr140", 71 } 72 cset_url = ( 73 "https://hg.mozilla.org/{repo}/json-pushes?" 74 "changeset={changeset}&version=2&tipsonly=1" 75 ) 76 77 tc_url = ( 78 "https://firefox-ci-tc.services.mozilla.com/api/index/v1/" 79 "task/gecko.v2.{name}." 80 "revision.{changeset}.source.manifest-upload" 81 ) 82 83 default = ( 84 "https://firefox-ci-tc.services.mozilla.com/api/index/v1/" 85 "task/gecko.v2.mozilla-central.latest.source.manifest-upload" + artifact_path 86 ) 87 88 for revision in commits: 89 req = None 90 91 if revision == 40 * "0": 92 continue 93 94 for repo_path, index_name in repos.items(): 95 try: 96 req_headers = HEADERS.copy() 97 req_headers.update({"Accept": "application/json"}) 98 req = get( 99 logger, 100 cset_url.format(changeset=revision, repo=repo_path), 101 headers=req_headers, 102 ) 103 req.raise_for_status() 104 except requests.exceptions.RequestException: 105 if req is not None and req.status_code == 404: 106 # The API returns a 404 if it can't find a changeset for the revision. 107 logger.debug("%s not found in %s" % (revision, repo_path)) 108 continue 109 else: 110 return default 111 112 result = req.json() 113 114 pushes = result["pushes"] 115 if not pushes: 116 logger.debug("Error reading response; 'pushes' key not found") 117 continue 118 [cset] = next(iter(pushes.values()))["changesets"] 119 120 tc_index_url = tc_url.format(changeset=cset, name=index_name) 121 try: 122 req = get(logger, tc_index_url) 123 except requests.exceptions.RequestException: 124 return default 125 126 if req.status_code == 200: 127 return tc_index_url + artifact_path 128 129 logger.info( 130 "Can't find a commit-specific manifest so just using the most recent one" 131 ) 132 133 return default 134 135 136 def download_manifest(logger, test_paths, commits_func, url_func, force=False): 137 manifest_paths = [ 138 (item["manifest_path"] if isinstance(item, dict) else item.manifest_path) 139 for item in test_paths.values() 140 ] 141 142 if not force and not should_download(logger, manifest_paths): 143 return True 144 145 commits = commits_func() 146 147 url = url_func(logger, commits) 148 if not url: 149 logger.warning("No generated manifest found") 150 return False 151 152 logger.info("Downloading manifest from %s" % url) 153 try: 154 req = get(logger, url) 155 except Exception: 156 logger.warning("Downloading pregenerated manifest failed") 157 return False 158 159 if req.status_code != 200: 160 logger.warning( 161 "Downloading pregenerated manifest failed; got " 162 "HTTP status %d" % req.status_code 163 ) 164 return False 165 166 tar = tarfile.open(mode="r:gz", fileobj=BytesIO(req.content)) 167 for paths in test_paths.values(): 168 manifest_rel_path = ( 169 paths["manifest_rel_path"] 170 if isinstance(paths, dict) 171 else paths.manifest_rel_path 172 ) 173 manifest_path = ( 174 paths["manifest_path"] if isinstance(paths, dict) else paths.manifest_path 175 ) 176 177 try: 178 member = tar.getmember(manifest_rel_path.replace(os.path.sep, "/")) 179 except KeyError: 180 logger.warning("Failed to find downloaded manifest %s" % manifest_rel_path) 181 else: 182 try: 183 logger.debug("Unpacking %s to %s" % (member.name, manifest_path)) 184 src = tar.extractfile(member) 185 with open(manifest_path, "wb") as dest: 186 dest.write(src.read()) 187 src.close() 188 except OSError: 189 import traceback 190 191 logger.warning( 192 "Failed to decompress %s:\n%s" 193 % (manifest_rel_path, traceback.format_exc()) 194 ) 195 return False 196 197 os.utime(manifest_path, None) 198 199 return True 200 201 202 def download_from_taskcluster(logger, repo_root, test_paths, force=False): 203 return download_manifest( 204 logger, 205 test_paths, 206 lambda: get_commits(logger, repo_root), 207 taskcluster_url, 208 force, 209 )