download.py (3768B)
1 # mypy: allow-untyped-defs 2 3 import argparse 4 import os 5 import logging 6 7 import requests 8 9 import github 10 11 12 logging.basicConfig() 13 logger = logging.getLogger("tc-download") 14 15 # The root URL of the Taskcluster deployment from which to download wpt reports 16 # (after https://bugzilla.mozilla.org/show_bug.cgi?id=1574668 lands, this will 17 # be https://community-tc.services.mozilla.com) 18 TASKCLUSTER_ROOT_URL = 'https://taskcluster.net' 19 20 21 def get_parser(): 22 parser = argparse.ArgumentParser() 23 parser.add_argument("--ref", default="master", 24 help="Branch (in the GitHub repository) or commit to fetch logs for") 25 parser.add_argument("--artifact-name", default="wpt_report.json.gz", 26 help="Log type to fetch") 27 parser.add_argument("--repo-name", default="web-platform-tests/wpt", 28 help="GitHub repo name in the format owner/repo. " 29 "This must be the repo from which the Taskcluster run was scheduled " 30 "(for PRs this is the repo into which the PR would merge)") 31 parser.add_argument("--token-file", help="File containing GitHub token") 32 parser.add_argument("--out-dir", default=".", 33 help="Path to save the logfiles") 34 return parser 35 36 37 def get_json(url, key=None): 38 resp = requests.get(url) 39 resp.raise_for_status() 40 data = resp.json() 41 if key: 42 data = data[key] 43 return data 44 45 46 def get(url, dest, name): 47 resp = requests.get(url) 48 resp.raise_for_status() 49 path = os.path.join(dest, name) 50 with open(path, "w") as f: 51 f.write(resp.content) 52 return path 53 54 55 def run(*args, **kwargs): 56 if not os.path.exists(kwargs["out_dir"]): 57 os.mkdir(kwargs["out_dir"]) 58 59 if kwargs["token_file"]: 60 with open(kwargs["token_file"]) as f: 61 gh = github.Github(f.read().strip()) 62 else: 63 gh = github.Github() 64 65 repo = gh.get_repo(kwargs["repo_name"]) 66 commit = repo.get_commit(kwargs["ref"]) 67 statuses = commit.get_statuses() 68 taskgroups = set() 69 70 for status in statuses: 71 if not status.context.startswith("Taskcluster "): 72 continue 73 if status.state == "pending": 74 continue 75 taskgroup_id = status.target_url.rsplit("/", 1)[1] 76 taskgroups.add(taskgroup_id) 77 78 if not taskgroups: 79 logger.error("No complete Taskcluster runs found for ref %s" % kwargs["ref"]) 80 return 1 81 82 for taskgroup in taskgroups: 83 if TASKCLUSTER_ROOT_URL == 'https://taskcluster.net': 84 # NOTE: this condition can be removed after November 9, 2019 85 taskgroup_url = "https://queue.taskcluster.net/v1/task-group/%s/list" 86 artifacts_list_url = "https://queue.taskcluster.net/v1/task/%s/artifacts" 87 else: 88 taskgroup_url = TASKCLUSTER_ROOT_URL + "/api/queue/v1/task-group/%s/list" 89 artifacts_list_url = TASKCLUSTER_ROOT_URL + "/api/queue/v1/task/%s/artifacts" 90 tasks = get_json(taskgroup_url % taskgroup, "tasks") 91 for task in tasks: 92 task_id = task["status"]["taskId"] 93 url = artifacts_list_url % (task_id,) 94 for artifact in get_json(url, "artifacts"): 95 if artifact["name"].endswith(kwargs["artifact_name"]): 96 filename = "%s-%s-%s" % (task["task"]["metadata"]["name"], 97 task_id, 98 kwargs["artifact_name"]) 99 path = get("%s/%s" % (url, artifact["name"]), kwargs["out_dir"], filename) 100 logger.info(path) 101 102 103 def main(): 104 kwargs = get_parser().parse_args() 105 106 run(None, vars(kwargs)) 107 108 109 if __name__ == "__main__": 110 main() # type: ignore