fetchlogs.py (3550B)
1 # This Source Code Form is subject to the terms of the Mozilla Public 2 # License, v. 2.0. If a copy of the MPL was not distributed with this 3 # file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 5 import argparse 6 import os 7 8 import requests 9 import urlparse 10 11 treeherder_base = "https://treeherder.mozilla.org/" 12 13 """Simple script for downloading structured logs from treeherder. 14 15 For the moment this is specialised to work with web-platform-tests 16 logs; in due course it should move somewhere generic and get hooked 17 up to mach or similar""" 18 19 # Interpretation of the "job" list from 20 # https://github.com/mozilla/treeherder-service/blob/master/treeherder/webapp/api/utils.py#L18 21 22 23 def create_parser(): 24 parser = argparse.ArgumentParser() 25 parser.add_argument("branch", action="store", help="Branch on which jobs ran") 26 parser.add_argument("commit", action="store", help="Commit hash for push") 27 28 return parser 29 30 31 def download(url, prefix, dest, force_suffix=True): 32 if dest is None: 33 dest = "." 34 35 if prefix and not force_suffix: 36 name = os.path.join(dest, prefix + ".log") 37 else: 38 name = None 39 counter = 0 40 41 while not name or os.path.exists(name): 42 counter += 1 43 sep = "" if not prefix else "-" 44 name = os.path.join(dest, prefix + sep + str(counter) + ".log") 45 46 with open(name, "wb") as f: 47 resp = requests.get(url, stream=True) 48 for chunk in resp.iter_content(1024): 49 f.write(chunk) 50 51 52 def fetch_json(url, params=None): 53 headers = { 54 "Accept": "application/json", 55 "User-Agent": "wpt-fetchlogs", 56 } 57 response = requests.get(url=url, params=params, headers=headers, timeout=30) 58 response.raise_for_status() 59 return response.json() 60 61 62 def get_blobber_url(branch, job): 63 job_guid = job["job_guid"] 64 artifact_url = urlparse.urljoin(treeherder_base, "/api/jobdetail/") 65 artifact_params = { 66 "job_guid": job_guid, 67 } 68 job_data = fetch_json(artifact_url, params=artifact_params) 69 70 if job_data: 71 try: 72 for item in job_data["results"]: 73 if item["value"] == "wpt_raw.log" or item["value"] == "log_raw.log": 74 return item["url"] 75 except Exception: 76 return None 77 78 79 def get_structured_logs(branch, commit, dest=None): 80 resultset_url = urlparse.urljoin( 81 treeherder_base, "/api/project/%s/resultset/" % branch 82 ) 83 resultset_params = { 84 "revision": commit, 85 } 86 revision_data = fetch_json(resultset_url, params=resultset_params) 87 result_set = revision_data["results"][0]["id"] 88 89 jobs_url = urlparse.urljoin(treeherder_base, "/api/project/%s/jobs/" % branch) 90 jobs_params = { 91 "result_set_id": result_set, 92 "count": 2000, 93 "exclusion_profile": "false", 94 } 95 job_data = fetch_json(jobs_url, params=jobs_params) 96 97 tasks = [] 98 99 for result in job_data["results"]: 100 job_type_name = result["job_type_name"] 101 if ( 102 job_type_name.startswith("W3C Web Platform") 103 or job_type_name.startswith("test-") 104 and "-web-platform-tests-" in job_type_name 105 ): 106 url = get_blobber_url(branch, result) 107 if url: 108 prefix = result["platform"] # platform 109 tasks.append((url, prefix, None)) 110 111 for task in tasks: 112 download(*task) 113 114 115 def main(): 116 parser = create_parser() 117 args = parser.parse_args() 118 119 get_structured_logs(args.branch, args.commit) 120 121 122 if __name__ == "__main__": 123 main()