tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

fetchlogs.py (3550B)


      1 # This Source Code Form is subject to the terms of the Mozilla Public
      2 # License, v. 2.0. If a copy of the MPL was not distributed with this
      3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
      4 
      5 import argparse
      6 import os
      7 
      8 import requests
      9 import urlparse
     10 
     11 treeherder_base = "https://treeherder.mozilla.org/"
     12 
     13 """Simple script for downloading structured logs from treeherder.
     14 
     15 For the moment this is specialised to work with web-platform-tests
     16 logs; in due course it should move somewhere generic and get hooked
     17 up to mach or similar"""
     18 
     19 # Interpretation of the "job" list from
     20 # https://github.com/mozilla/treeherder-service/blob/master/treeherder/webapp/api/utils.py#L18
     21 
     22 
     23 def create_parser():
     24    parser = argparse.ArgumentParser()
     25    parser.add_argument("branch", action="store", help="Branch on which jobs ran")
     26    parser.add_argument("commit", action="store", help="Commit hash for push")
     27 
     28    return parser
     29 
     30 
     31 def download(url, prefix, dest, force_suffix=True):
     32    if dest is None:
     33        dest = "."
     34 
     35    if prefix and not force_suffix:
     36        name = os.path.join(dest, prefix + ".log")
     37    else:
     38        name = None
     39    counter = 0
     40 
     41    while not name or os.path.exists(name):
     42        counter += 1
     43        sep = "" if not prefix else "-"
     44        name = os.path.join(dest, prefix + sep + str(counter) + ".log")
     45 
     46    with open(name, "wb") as f:
     47        resp = requests.get(url, stream=True)
     48        for chunk in resp.iter_content(1024):
     49            f.write(chunk)
     50 
     51 
     52 def fetch_json(url, params=None):
     53    headers = {
     54        "Accept": "application/json",
     55        "User-Agent": "wpt-fetchlogs",
     56    }
     57    response = requests.get(url=url, params=params, headers=headers, timeout=30)
     58    response.raise_for_status()
     59    return response.json()
     60 
     61 
     62 def get_blobber_url(branch, job):
     63    job_guid = job["job_guid"]
     64    artifact_url = urlparse.urljoin(treeherder_base, "/api/jobdetail/")
     65    artifact_params = {
     66        "job_guid": job_guid,
     67    }
     68    job_data = fetch_json(artifact_url, params=artifact_params)
     69 
     70    if job_data:
     71        try:
     72            for item in job_data["results"]:
     73                if item["value"] == "wpt_raw.log" or item["value"] == "log_raw.log":
     74                    return item["url"]
     75        except Exception:
     76            return None
     77 
     78 
     79 def get_structured_logs(branch, commit, dest=None):
     80    resultset_url = urlparse.urljoin(
     81        treeherder_base, "/api/project/%s/resultset/" % branch
     82    )
     83    resultset_params = {
     84        "revision": commit,
     85    }
     86    revision_data = fetch_json(resultset_url, params=resultset_params)
     87    result_set = revision_data["results"][0]["id"]
     88 
     89    jobs_url = urlparse.urljoin(treeherder_base, "/api/project/%s/jobs/" % branch)
     90    jobs_params = {
     91        "result_set_id": result_set,
     92        "count": 2000,
     93        "exclusion_profile": "false",
     94    }
     95    job_data = fetch_json(jobs_url, params=jobs_params)
     96 
     97    tasks = []
     98 
     99    for result in job_data["results"]:
    100        job_type_name = result["job_type_name"]
    101        if (
    102            job_type_name.startswith("W3C Web Platform")
    103            or job_type_name.startswith("test-")
    104            and "-web-platform-tests-" in job_type_name
    105        ):
    106            url = get_blobber_url(branch, result)
    107            if url:
    108                prefix = result["platform"]  # platform
    109                tasks.append((url, prefix, None))
    110 
    111    for task in tasks:
    112        download(*task)
    113 
    114 
    115 def main():
    116    parser = create_parser()
    117    args = parser.parse_args()
    118 
    119    get_structured_logs(args.branch, args.commit)
    120 
    121 
    122 if __name__ == "__main__":
    123    main()