tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

chunk.py (11911B)


      1 # This Source Code Form is subject to the terms of the Mozilla Public
      2 # License, v. 2.0. If a copy of the MPL was not distributed with this
      3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
      4 
      5 import taskgraph
      6 from taskgraph.transforms.base import TransformSequence
      7 from taskgraph.util import json
      8 from taskgraph.util.attributes import keymatch
      9 from taskgraph.util.copy import deepcopy
     10 from taskgraph.util.treeherder import join_symbol, split_symbol
     11 
     12 from gecko_taskgraph.util.attributes import is_try
     13 from gecko_taskgraph.util.chunking import (
     14    WPT_SUBSUITES,
     15    DefaultLoader,
     16    chunk_manifests,
     17    get_manifest_loader,
     18    get_runtimes,
     19    get_test_tags,
     20    guess_mozinfo_from_task,
     21 )
     22 from gecko_taskgraph.util.perfile import perfile_number_of_chunks
     23 
     24 DYNAMIC_CHUNK_DURATION = 20 * 60  # seconds
     25 """The approximate time each test chunk should take to run."""
     26 
     27 
     28 DYNAMIC_CHUNK_MULTIPLIER = {
     29    # Desktop xpcshell tests run in parallel. Reduce the total runtime to
     30    # compensate.
     31    "^(?!android).*-xpcshell.*": 0.2,
     32 }
     33 """A multiplication factor to tweak the total duration per platform / suite."""
     34 
     35 transforms = TransformSequence()
     36 
     37 
     38 @transforms.add
     39 def set_test_verify_chunks(config, tasks):
     40    """Set the number of chunks we use for test-verify."""
     41    for task in tasks:
     42        if any(task["suite"].startswith(s) for s in ("test-verify", "test-coverage")):
     43            env = config.params.get("try_task_config", {}) or {}
     44            env = env.get("templates", {}).get("env", {})
     45            task["chunks"] = perfile_number_of_chunks(
     46                is_try(config.params),
     47                env.get("MOZHARNESS_TEST_PATHS", ""),
     48                frozenset(config.params["files_changed"]),
     49                task["test-name"],
     50            )
     51 
     52            # limit the number of chunks we run for test-verify mode because
     53            # test-verify is comprehensive and takes a lot of time, if we have
     54            # >30 tests changed, this is probably an import of external tests,
     55            # or a patch renaming/moving files in bulk
     56            maximum_number_verify_chunks = 3
     57            task["chunks"] = min(task["chunks"], maximum_number_verify_chunks)
     58 
     59        yield task
     60 
     61 
     62 @transforms.add
     63 def set_test_manifests(config, tasks):
     64    """Determine the set of test manifests that should run in this task."""
     65 
     66    for task in tasks:
     67        # When a task explicitly requests no 'test_manifest_loader', test
     68        # resolving will happen at test runtime rather than in the taskgraph.
     69        if "test-manifest-loader" in task and task["test-manifest-loader"] is None:
     70            yield task
     71            continue
     72 
     73        # Set 'tests_grouped' to "1", so we can differentiate between suites that are
     74        # chunked at the test runtime and those that are chunked in the taskgraph.
     75        task.setdefault("tags", {})["tests_grouped"] = "1"
     76 
     77        if taskgraph.fast:
     78            # We want to avoid evaluating manifests when taskgraph.fast is set. But
     79            # manifests are required for dynamic chunking. Just set the number of
     80            # chunks to one in this case.
     81            if task["chunks"] == "dynamic":
     82                task["chunks"] = 1
     83            yield task
     84            continue
     85 
     86        manifests = task.get("test-manifests")
     87        if manifests:
     88            if isinstance(manifests, list):
     89                task["test-manifests"] = {"active": manifests, "skipped": []}
     90            yield task
     91            continue
     92 
     93        mozinfo = guess_mozinfo_from_task(
     94            task,
     95            config.params.get("head_repository", ""),
     96            config.params.get("app_version", ""),
     97            get_test_tags(config, task.get("worker", {}).get("env", {})),
     98        )
     99 
    100        loader_name = task.pop(
    101            "test-manifest-loader", config.params["test_manifest_loader"]
    102        )
    103        loader = get_manifest_loader(loader_name, config.params)
    104 
    105        task["test-manifests"] = loader.get_manifests(
    106            task["suite"],
    107            frozenset(mozinfo.items()),
    108        )
    109 
    110        # When scheduling with test paths, we often find manifests scheduled but all tests
    111        # are skipped on a given config.  This will remove the task from the task set if
    112        # no manifests have active tests for the given task/config
    113        mh_test_paths = {}
    114        if "MOZHARNESS_TEST_PATHS" in config.params.get("try_task_config", {}).get(
    115            "env", {}
    116        ):
    117            mh_test_paths = json.loads(
    118                config.params["try_task_config"]["env"]["MOZHARNESS_TEST_PATHS"]
    119            )
    120 
    121        if (
    122            mh_test_paths
    123            and task["attributes"]["unittest_suite"] in mh_test_paths.keys()
    124        ):
    125            input_paths = mh_test_paths[task["attributes"]["unittest_suite"]]
    126            remaining_manifests = []
    127 
    128            # if we have web-platform tests incoming, just yield task
    129            found_wpt = False
    130            for m in input_paths:
    131                if m.startswith("testing/web-platform/tests/"):
    132                    found_subsuite = [
    133                        key for key in WPT_SUBSUITES if key in task["test-name"]
    134                    ]
    135                    if found_subsuite:
    136                        if any(
    137                            test_subsuite in m
    138                            for test_subsuite in WPT_SUBSUITES[found_subsuite[0]]
    139                        ):
    140                            yield task
    141                    else:
    142                        if not isinstance(loader, DefaultLoader):
    143                            task["chunks"] = "dynamic"
    144                        yield task
    145                    found_wpt = True
    146                    break
    147            if found_wpt:
    148                continue
    149 
    150            # input paths can exist in other directories (i.e. [../../dir/test.js])
    151            # we need to look for all [active] manifests that include tests in the path
    152            for m in input_paths:
    153                if [tm for tm in task["test-manifests"]["active"] if tm.startswith(m)]:
    154                    remaining_manifests.append(m)
    155 
    156            # look in the 'other' manifests
    157            for m in input_paths:
    158                man = m
    159                for tm in task["test-manifests"]["other_dirs"]:
    160                    matched_dirs = [
    161                        dp
    162                        for dp in task["test-manifests"]["other_dirs"].get(tm)
    163                        if dp.startswith(man)
    164                    ]
    165                    if matched_dirs:
    166                        if tm not in task["test-manifests"]["active"]:
    167                            continue
    168                        if m not in remaining_manifests:
    169                            remaining_manifests.append(m)
    170 
    171            if remaining_manifests == []:
    172                continue
    173 
    174        elif mh_test_paths:
    175            # we have test paths and they are not related to the test suite
    176            # this could be the test suite doesn't support test paths
    177            continue
    178        elif (
    179            get_test_tags(config, task.get("worker", {}).get("env", {}))
    180            and not task["test-manifests"]["active"]
    181            and not task["test-manifests"]["other_dirs"]
    182        ):
    183            # no MH_TEST_PATHS, but MH_TEST_TAG or other filters
    184            continue
    185 
    186        # The default loader loads all manifests. If we use a non-default
    187        # loader, we'll only run some subset of manifests and the hardcoded
    188        # chunk numbers will no longer be valid. Dynamic chunking should yield
    189        # better results.
    190        if not isinstance(loader, DefaultLoader):
    191            task["chunks"] = "dynamic"
    192 
    193        yield task
    194 
    195 
    196 @transforms.add
    197 def resolve_dynamic_chunks(config, tasks):
    198    """Determine how many chunks are needed to handle the given set of manifests."""
    199 
    200    for task in tasks:
    201        if task["chunks"] != "dynamic":
    202            yield task
    203            continue
    204 
    205        if not task.get("test-manifests"):
    206            raise Exception(
    207                "{} must define 'test-manifests' to use dynamic chunking!".format(
    208                    task["test-name"]
    209                )
    210            )
    211 
    212        runtimes = {
    213            m: r
    214            for m, r in get_runtimes(task["test-platform"], task["suite"]).items()
    215            if m in task["test-manifests"]["active"]
    216        }
    217 
    218        # Truncate runtimes that are above the desired chunk duration. They
    219        # will be assigned to a chunk on their own and the excess duration
    220        # shouldn't cause additional chunks to be needed.
    221        times = [min(DYNAMIC_CHUNK_DURATION, r) for r in runtimes.values()]
    222        avg = round(sum(times) / len(times), 2) if times else 0
    223        total = sum(times)
    224 
    225        # If there are manifests missing from the runtimes data, fill them in
    226        # with the average of all present manifests.
    227        missing = [m for m in task["test-manifests"]["active"] if m not in runtimes]
    228        total += avg * len(missing)
    229 
    230        # Apply any chunk multipliers if found.
    231        key = "{}-{}".format(task["test-platform"], task["test-name"])
    232        matches = keymatch(DYNAMIC_CHUNK_MULTIPLIER, key)
    233        if len(matches) > 1:
    234            raise Exception(
    235                f"Multiple matching values for {key} found while "
    236                "determining dynamic chunk multiplier!"
    237            )
    238        elif matches:
    239            total = total * matches[0]
    240 
    241        chunks = int(round(total / DYNAMIC_CHUNK_DURATION))
    242 
    243        # Make sure we never exceed the number of manifests, nor have a chunk
    244        # length of 0.
    245        task["chunks"] = min(chunks, len(task["test-manifests"]["active"])) or 1
    246        yield task
    247 
    248 
    249 @transforms.add
    250 def split_chunks(config, tasks):
    251    """Based on the 'chunks' key, split tests up into chunks by duplicating
    252    them and assigning 'this-chunk' appropriately and updating the treeherder
    253    symbol.
    254    """
    255 
    256    for task in tasks:
    257        # If test-manifests are set, chunk them ahead of time to avoid running
    258        # the algorithm more than once.
    259        chunked_manifests = None
    260        if "test-manifests" in task:
    261            # TODO: hardcoded to "2", ideally this should be centralized somewhere
    262            if (
    263                config.params["try_task_config"].get("new-test-config", False)
    264                and task["chunks"] > 1
    265            ):
    266                task["chunks"] *= 2
    267                task["max-run-time"] = int(task["max-run-time"] * 2)
    268 
    269            manifests = task["test-manifests"]
    270            chunked_manifests = chunk_manifests(
    271                task["suite"],
    272                task["test-platform"],
    273                task["chunks"],
    274                manifests["active"],
    275            )
    276 
    277            # Add all skipped manifests to the first chunk of backstop pushes
    278            # so they still show up in the logs. They won't impact runtime much
    279            # and this way tools like ActiveData are still aware that they
    280            # exist.
    281            if (
    282                config.params["backstop"]
    283                and manifests["active"]
    284                and "skipped" in manifests
    285            ):
    286                chunked_manifests[0].extend([
    287                    m for m in manifests["skipped"] if not m.endswith(".list")
    288                ])
    289        last_chunk = task["chunks"]
    290        for i in range(task["chunks"]):
    291            this_chunk = i + 1
    292 
    293            # copy the test and update with the chunk number
    294            chunked = deepcopy(task) if this_chunk != last_chunk else task
    295            chunked["this-chunk"] = this_chunk
    296 
    297            if chunked_manifests is not None:
    298                chunked["test-manifests"] = sorted(chunked_manifests[i])
    299 
    300            group, symbol = split_symbol(chunked["treeherder-symbol"])
    301            if task["chunks"] > 1 or not symbol:
    302                # add the chunk number to the TH symbol
    303                symbol += str(this_chunk)
    304                chunked["treeherder-symbol"] = join_symbol(group, symbol)
    305 
    306            yield chunked