tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

bugbug.py (6308B)


      1 # This Source Code Form is subject to the terms of the Mozilla Public
      2 # License, v. 2.0. If a copy of the MPL was not distributed with this
      3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
      4 
      5 
      6 import hashlib
      7 import os
      8 import pathlib
      9 import sys
     10 import time
     11 
     12 import requests
     13 from mozbuild.util import memoize
     14 from taskgraph import create
     15 from taskgraph.util import json
     16 from taskgraph.util.taskcluster import requests_retry_session
     17 
     18 try:
     19    # TODO(py3): use time.monotonic()
     20    from time import monotonic
     21 except ImportError:
     22    from time import time as monotonic
     23 
     24 BUGBUG_BASE_URL = "https://bugbug.moz.tools"
     25 RETRY_TIMEOUT = 9 * 60  # seconds
     26 RETRY_INTERVAL = 10  # seconds
     27 
     28 # Preset confidence thresholds.
     29 CT_LOW = 0.7
     30 CT_MEDIUM = 0.8
     31 CT_HIGH = 0.9
     32 
     33 GROUP_TRANSLATIONS = {
     34    "testing/web-platform/tests": "",
     35    "testing/web-platform/mozilla/tests": "/_mozilla",
     36 }
     37 
     38 
     39 def translate_group(group):
     40    for prefix, value in GROUP_TRANSLATIONS.items():
     41        if group.startswith(prefix):
     42            return group.replace(prefix, value)
     43 
     44    return group
     45 
     46 
     47 class BugbugTimeoutException(Exception):
     48    pass
     49 
     50 
     51 @memoize
     52 def get_session():
     53    s = requests.Session()
     54    s.headers.update({"X-API-KEY": "gecko-taskgraph"})
     55    return requests_retry_session(retries=5, session=s)
     56 
     57 
     58 def _perfherder_artifact_path(base_path, perfherder_data):
     59    base_dir = base_path.parent
     60    stem = base_path.stem
     61    sequence = int(time.monotonic() * 1000)
     62    payload = json.dumps(perfherder_data, sort_keys=True).encode("utf-8")
     63    digest = hashlib.sha1(payload).hexdigest()[:8]
     64 
     65    return base_dir / f"{stem}-{sequence}-{digest}.json"
     66 
     67 
     68 def _write_perfherder_data(lower_is_better):
     69    if os.environ.get("MOZ_AUTOMATION", "0") == "1":
     70        perfherder_data = {
     71            "framework": {"name": "build_metrics"},
     72            "suites": [
     73                {
     74                    "name": suite,
     75                    "value": value,
     76                    "lowerIsBetter": True,
     77                    "shouldAlert": False,
     78                    "subtests": [],
     79                }
     80                for suite, value in lower_is_better.items()
     81            ],
     82        }
     83        print(f"PERFHERDER_DATA: {json.dumps(perfherder_data)}", file=sys.stderr)
     84        perfherder_path = os.environ.get("MOZ_PERFHERDER_UPLOAD")
     85        decision_upload_dir = os.environ.get("MOZ_UPLOAD_DIR")
     86        if perfherder_path:
     87            upload_path = pathlib.Path(perfherder_path)
     88        elif decision_upload_dir:
     89            upload_path = (
     90                pathlib.Path(decision_upload_dir) / "perfherder-data-bugbug.json"
     91            )
     92        else:
     93            return
     94 
     95        upload_path.parent.mkdir(parents=True, exist_ok=True)
     96        target = _perfherder_artifact_path(upload_path, perfherder_data)
     97        with target.open("w", encoding="utf-8") as f:
     98            json.dump(perfherder_data, f)
     99 
    100 
    101 @memoize
    102 def push_schedules(branch, rev):
    103    # Noop if we're in test-action-callback
    104    if create.testing:
    105        return
    106 
    107    url = BUGBUG_BASE_URL + f"/push/{branch}/{rev}/schedules"
    108    start = monotonic()
    109    session = get_session()
    110 
    111    # On try there is no fallback and pulling is slower, so we allow bugbug more
    112    # time to compute the results.
    113    # See https://github.com/mozilla/bugbug/issues/1673.
    114    timeout = RETRY_TIMEOUT
    115    if branch == "try":
    116        timeout += int(timeout / 3)
    117 
    118    attempts = timeout / RETRY_INTERVAL
    119    i = 0
    120    while i < attempts:
    121        r = session.get(url)
    122        r.raise_for_status()
    123 
    124        if r.status_code != 202:
    125            break
    126 
    127        time.sleep(RETRY_INTERVAL)
    128        i += 1
    129    end = monotonic()
    130 
    131    _write_perfherder_data(
    132        lower_is_better={
    133            "bugbug_push_schedules_time": end - start,
    134            "bugbug_push_schedules_retries": i,
    135        }
    136    )
    137 
    138    data = r.json()
    139    if r.status_code == 202:
    140        raise BugbugTimeoutException(f"Timed out waiting for result from '{url}'")
    141 
    142    if "groups" in data:
    143        data["groups"] = {translate_group(k): v for k, v in data["groups"].items()}
    144 
    145    if "config_groups" in data:
    146        data["config_groups"] = {
    147            translate_group(k): v for k, v in data["config_groups"].items()
    148        }
    149 
    150    return data
    151 
    152 
    153 @memoize
    154 def patch_schedules(base_rev, patch_content, mode="quick"):
    155    """Query BugBug API with a patch to get test recommendations.
    156 
    157    This is used by `./mach test --auto` to get test recommendations for local changes.
    158 
    159    Args:
    160        base_rev (str): The base revision hash.
    161        patch_content (str): The patch content with commit metadata.
    162        mode (str): The mode of test selection, which determines the confidence
    163            threshold. One of 'extensive', 'moderate', or 'quick'.
    164    Returns:
    165        dict: A dictionary with containing test recommendations filtered by
    166            confidence threshold.
    167 
    168    Raises:
    169        BugbugTimeoutException: If the API times out.
    170    """
    171 
    172    import hashlib
    173    import re
    174 
    175    # This ensures consistent hashing across multiple runs with identical
    176    # changes by stripping the date before hashing.
    177    filtered_content = re.sub(r"^Date: .*$", "", patch_content, flags=re.MULTILINE)
    178    patch_hash = hashlib.md5(filtered_content.encode("utf-8")).hexdigest()
    179 
    180    url = BUGBUG_BASE_URL + f"/patch/{base_rev}/{patch_hash}/schedules"
    181 
    182    session = get_session()
    183 
    184    r = session.post(
    185        url,
    186        data=patch_content.encode("utf-8"),
    187        headers={"Content-Type": "text/plain"},
    188    )
    189    r.raise_for_status()
    190 
    191    timeout = RETRY_TIMEOUT
    192    attempts = timeout / RETRY_INTERVAL
    193    i = 0
    194    while i < attempts:
    195        if r.status_code != 202:
    196            break
    197 
    198        time.sleep(RETRY_INTERVAL)
    199        r = session.get(url)
    200        r.raise_for_status()
    201        i += 1
    202 
    203    data = r.json()
    204    if r.status_code == 202:
    205        raise BugbugTimeoutException(f"Timed out waiting for result from '{url}'")
    206 
    207    if mode == "extensive":
    208        confidence_threshold = CT_LOW
    209    elif mode == "moderate":
    210        confidence_threshold = CT_MEDIUM
    211    elif mode == "quick":
    212        confidence_threshold = CT_HIGH
    213    else:
    214        raise ValueError(
    215            f"Invalid mode: '{mode}'; expected one of 'extensive', 'moderate', 'quick'"
    216        )
    217 
    218    return {k: v for k, v in data["groups"].items() if v >= confidence_threshold}