tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

bugbug.py (11575B)


      1 # This Source Code Form is subject to the terms of the Mozilla Public
      2 # License, v. 2.0. If a copy of the MPL was not distributed with this
      3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
      4 
      5 
      6 from collections import defaultdict
      7 from fnmatch import fnmatch
      8 
      9 from requests.exceptions import RetryError
     10 from taskgraph.optimize.base import OptimizationStrategy, register_strategy, registry
     11 
     12 from gecko_taskgraph.util.bugbug import (
     13    CT_HIGH,
     14    CT_LOW,
     15    CT_MEDIUM,
     16    BugbugTimeoutException,
     17    push_schedules,
     18 )
     19 from gecko_taskgraph.util.hg import get_push_data
     20 
     21 FALLBACK = "skip-unless-has-relevant-tests"
     22 
     23 
     24 def merge_bugbug_replies(data, new_data):
     25    """Merge a bugbug reply (stored in the `new_data` argument) into another (stored
     26    in the `data` argument).
     27    """
     28    for key, value in new_data.items():
     29        if isinstance(value, dict):
     30            if key not in data:
     31                data[key] = {}
     32 
     33            if len(value) == 0:
     34                continue
     35 
     36            dict_value = next(iter(value.values()))
     37            if isinstance(dict_value, list):
     38                for name, configs in value.items():
     39                    if name not in data[key]:
     40                        data[key][name] = set()
     41 
     42                    data[key][name].update(configs)
     43            else:
     44                for name, confidence in value.items():
     45                    if name not in data[key] or data[key][name] < confidence:
     46                        data[key][name] = confidence
     47        elif isinstance(value, list):
     48            if key not in data:
     49                data[key] = set()
     50 
     51            data[key].update(value)
     52 
     53 
     54 @register_strategy("bugbug-low", args=(CT_LOW,))
     55 @register_strategy("bugbug-medium", args=(CT_MEDIUM,))
     56 @register_strategy("bugbug-high", args=(CT_HIGH,))
     57 @register_strategy("bugbug-tasks-medium", args=(CT_MEDIUM, True))
     58 @register_strategy("bugbug-tasks-high", args=(CT_HIGH, True))
     59 @register_strategy("bugbug-reduced", args=(CT_MEDIUM, True, True))
     60 @register_strategy("bugbug-reduced-fallback", args=(CT_MEDIUM, True, True, FALLBACK))
     61 @register_strategy("bugbug-reduced-high", args=(CT_HIGH, True, True))
     62 @register_strategy("bugbug-reduced-manifests", args=(CT_MEDIUM, False, True))
     63 @register_strategy(
     64    "bugbug-reduced-manifests-config-selection-low",
     65    args=(CT_LOW, False, True, None, 1, True),
     66 )
     67 @register_strategy(
     68    "bugbug-reduced-manifests-config-selection",
     69    args=(CT_MEDIUM, False, True, None, 1, True),
     70 )
     71 @register_strategy(
     72    "bugbug-reduced-manifests-fallback-low", args=(CT_LOW, False, True, FALLBACK)
     73 )
     74 @register_strategy(
     75    "bugbug-reduced-manifests-fallback", args=(CT_MEDIUM, False, True, FALLBACK)
     76 )
     77 @register_strategy(
     78    "bugbug-reduced-manifests-fallback-last-10-pushes",
     79    args=(0.3, False, True, FALLBACK, 10),
     80 )
     81 class BugBugPushSchedules(OptimizationStrategy):
     82    """Query the 'bugbug' service to retrieve relevant tasks and manifests.
     83 
     84    Args:
     85        confidence_threshold (float): The minimum confidence threshold (in
     86            range [0, 1]) needed for a task to be scheduled.
     87        tasks_only (bool): Whether or not to only use tasks and no groups
     88            (default: False)
     89        use_reduced_tasks (bool): Whether or not to use the reduced set of tasks
     90            provided by the bugbug service (default: False).
     91        fallback (str): The fallback strategy to use if there
     92            was a failure in bugbug (default: None)
     93        num_pushes (int): The number of pushes to consider for the selection
     94            (default: 1).
     95        select_configs (bool): Whether to select configurations for manifests
     96            too (default: False).
     97    """
     98 
     99    def __init__(
    100        self,
    101        confidence_threshold,
    102        tasks_only=False,
    103        use_reduced_tasks=False,
    104        fallback=None,
    105        num_pushes=1,
    106        select_configs=False,
    107    ):
    108        self.confidence_threshold = confidence_threshold
    109        self.use_reduced_tasks = use_reduced_tasks
    110        self.fallback = fallback
    111        self.tasks_only = tasks_only
    112        self.num_pushes = num_pushes
    113        self.select_configs = select_configs
    114        self.timedout = False
    115 
    116    def should_remove_task(self, task, params, importance):
    117        project = params["project"]
    118 
    119        if project not in ("autoland", "try"):
    120            return False
    121 
    122        current_push_id = int(params["pushlog_id"])
    123 
    124        rev = params["head_rev"]
    125 
    126        if self.timedout:
    127            return registry[self.fallback].should_remove_task(task, params, importance)
    128 
    129        data = {}
    130 
    131        start_push_id = current_push_id - self.num_pushes + 1
    132        if self.num_pushes != 1:
    133            push_data = get_push_data(
    134                params["head_repository"], project, start_push_id, current_push_id - 1
    135            )
    136 
    137        for push_id in range(start_push_id, current_push_id + 1):
    138            if push_id == current_push_id:
    139                rev = params["head_rev"]
    140            else:
    141                rev = push_data[push_id]["changesets"][-1]
    142 
    143            try:
    144                new_data = push_schedules(params["project"], rev)
    145                merge_bugbug_replies(data, new_data)
    146            except (BugbugTimeoutException, RetryError):
    147                if not self.fallback:
    148                    raise
    149 
    150                self.timedout = True
    151                return self.should_remove_task(task, params, importance)
    152 
    153        key = "reduced_tasks" if self.use_reduced_tasks else "tasks"
    154        tasks = {
    155            task
    156            for task, confidence in data.get(key, {}).items()
    157            if confidence >= self.confidence_threshold
    158        }
    159 
    160        test_manifests = task.attributes.get("test_manifests")
    161        if test_manifests is None or self.tasks_only:
    162            if data.get("known_tasks") and task.label not in data["known_tasks"]:
    163                return False
    164 
    165            if task.label not in tasks:
    166                return True
    167 
    168            return False
    169 
    170        # If a task contains more than one group, use the max confidence.
    171        groups = data.get("groups", {})
    172        confidences = [c for g, c in groups.items() if g in test_manifests]
    173        if not confidences or max(confidences) < self.confidence_threshold:
    174            return True
    175 
    176        # If the task configuration doesn't match the ones selected by bugbug for
    177        # the manifests, optimize out.
    178        if self.select_configs:
    179            selected_groups = [
    180                g
    181                for g, c in groups.items()
    182                if g in test_manifests and c > self.confidence_threshold
    183            ]
    184 
    185            config_groups = data.get("config_groups", defaultdict(list))
    186 
    187            # Configurations returned by bugbug are in a format such as
    188            # `test-windows10-64/opt-*-e10s`, while task labels are like
    189            # test-windows10-64-qr/opt-mochitest-browser-chrome-e10s-6.
    190            # In order to match the strings, we need to ignore the chunk number
    191            # from the task label.
    192            parts = task.label.split("-")
    193            label_without_chunk_number = "-".join(
    194                parts[:-1] if parts[-1].isdigit() else parts
    195            )
    196 
    197            if not any(
    198                fnmatch(label_without_chunk_number, config)
    199                for group in selected_groups
    200                for config in config_groups[group]
    201            ):
    202                return True
    203 
    204        # Store group importance so future optimizers can access it.
    205        for manifest in test_manifests:
    206            if manifest not in groups:
    207                continue
    208 
    209            confidence = groups[manifest]
    210            if confidence >= CT_HIGH:
    211                importance[manifest] = "high"
    212            elif confidence >= CT_MEDIUM:
    213                importance[manifest] = "medium"
    214            elif confidence >= CT_LOW:
    215                importance[manifest] = "low"
    216            else:
    217                importance[manifest] = "lowest"
    218 
    219        return False
    220 
    221 
    222 @register_strategy("platform-debug")
    223 class SkipUnlessDebug(OptimizationStrategy):
    224    """Only run debug platforms."""
    225 
    226    def should_remove_task(self, task, params, arg):
    227        return (
    228            "build_type" in task.attributes and task.attributes["build_type"] != "debug"
    229        )
    230 
    231 
    232 @register_strategy("platform-disperse")
    233 @register_strategy("platform-disperse-no-unseen", args=(None, 0))
    234 @register_strategy(
    235    "platform-disperse-only-one",
    236    args=(
    237        {
    238            "high": 1,
    239            "medium": 1,
    240            "low": 1,
    241            "lowest": 0,
    242        },
    243        0,
    244    ),
    245 )
    246 class DisperseGroups(OptimizationStrategy):
    247    """Disperse groups across test configs.
    248 
    249    Each task has an associated 'importance' dict passed in via the arg. This
    250    is of the form `{<group>: <importance>}`.
    251 
    252    Where 'group' is a test group id (usually a path to a manifest), and 'importance' is
    253    one of `{'lowest', 'low', 'medium', 'high'}`.
    254 
    255    Each importance value has an associated 'count' as defined in
    256    `self.target_counts`. It guarantees that 'manifest' will run in at least
    257    'count' different configurations (assuming there are enough tasks
    258    containing 'manifest').
    259 
    260    On configurations that haven't been seen before, we'll increase the target
    261    count by `self.unseen_modifier` to increase the likelihood of scheduling a
    262    task on that configuration.
    263 
    264    Args:
    265        target_counts (dict): Override DEFAULT_TARGET_COUNTS with custom counts. This
    266            is a dict mapping the importance value ('lowest', 'low', etc) to the
    267            minimum number of configurations manifests with this value should run
    268            on.
    269 
    270        unseen_modifier (int): Override DEFAULT_UNSEEN_MODIFIER to a custom
    271            value. This is the amount we'll increase 'target_count' by for unseen
    272            configurations.
    273    """
    274 
    275    DEFAULT_TARGET_COUNTS = {
    276        "high": 3,
    277        "medium": 2,
    278        "low": 1,
    279        "lowest": 0,
    280    }
    281    DEFAULT_UNSEEN_MODIFIER = 1
    282 
    283    def __init__(self, target_counts=None, unseen_modifier=DEFAULT_UNSEEN_MODIFIER):
    284        self.target_counts = self.DEFAULT_TARGET_COUNTS.copy()
    285        if target_counts:
    286            self.target_counts.update(target_counts)
    287        self.unseen_modifier = unseen_modifier
    288 
    289        self.count = defaultdict(int)
    290        self.seen_configurations = set()
    291 
    292    def should_remove_task(self, task, params, importance):
    293        test_manifests = task.attributes.get("test_manifests")
    294        test_platform = task.attributes.get("test_platform")
    295 
    296        if not importance or not test_manifests or not test_platform:
    297            return False
    298 
    299        # Build the test configuration key.
    300        key = test_platform
    301        if variant := task.attributes.get("unittest_variant"):
    302            key += "-" + variant
    303 
    304        important_manifests = set(test_manifests) & set(importance)
    305        for manifest in important_manifests:
    306            target_count = self.target_counts[importance[manifest]]
    307 
    308            # If this configuration hasn't been seen before, increase the
    309            # likelihood of scheduling the task.
    310            if key not in self.seen_configurations:
    311                target_count += self.unseen_modifier
    312 
    313            if self.count[manifest] < target_count:
    314                # Update manifest counts and seen configurations.
    315                self.seen_configurations.add(key)
    316                for manifest in important_manifests:
    317                    self.count[manifest] += 1
    318                return False
    319 
    320        # Should remove task because all manifests have reached their
    321        # importance count (or there were no important manifests).
    322        return True