chunk.py (11911B)
1 # This Source Code Form is subject to the terms of the Mozilla Public 2 # License, v. 2.0. If a copy of the MPL was not distributed with this 3 # file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 5 import taskgraph 6 from taskgraph.transforms.base import TransformSequence 7 from taskgraph.util import json 8 from taskgraph.util.attributes import keymatch 9 from taskgraph.util.copy import deepcopy 10 from taskgraph.util.treeherder import join_symbol, split_symbol 11 12 from gecko_taskgraph.util.attributes import is_try 13 from gecko_taskgraph.util.chunking import ( 14 WPT_SUBSUITES, 15 DefaultLoader, 16 chunk_manifests, 17 get_manifest_loader, 18 get_runtimes, 19 get_test_tags, 20 guess_mozinfo_from_task, 21 ) 22 from gecko_taskgraph.util.perfile import perfile_number_of_chunks 23 24 DYNAMIC_CHUNK_DURATION = 20 * 60 # seconds 25 """The approximate time each test chunk should take to run.""" 26 27 28 DYNAMIC_CHUNK_MULTIPLIER = { 29 # Desktop xpcshell tests run in parallel. Reduce the total runtime to 30 # compensate. 31 "^(?!android).*-xpcshell.*": 0.2, 32 } 33 """A multiplication factor to tweak the total duration per platform / suite.""" 34 35 transforms = TransformSequence() 36 37 38 @transforms.add 39 def set_test_verify_chunks(config, tasks): 40 """Set the number of chunks we use for test-verify.""" 41 for task in tasks: 42 if any(task["suite"].startswith(s) for s in ("test-verify", "test-coverage")): 43 env = config.params.get("try_task_config", {}) or {} 44 env = env.get("templates", {}).get("env", {}) 45 task["chunks"] = perfile_number_of_chunks( 46 is_try(config.params), 47 env.get("MOZHARNESS_TEST_PATHS", ""), 48 frozenset(config.params["files_changed"]), 49 task["test-name"], 50 ) 51 52 # limit the number of chunks we run for test-verify mode because 53 # test-verify is comprehensive and takes a lot of time, if we have 54 # >30 tests changed, this is probably an import of external tests, 55 # or a patch renaming/moving files in bulk 56 maximum_number_verify_chunks = 3 57 task["chunks"] = min(task["chunks"], maximum_number_verify_chunks) 58 59 yield task 60 61 62 @transforms.add 63 def set_test_manifests(config, tasks): 64 """Determine the set of test manifests that should run in this task.""" 65 66 for task in tasks: 67 # When a task explicitly requests no 'test_manifest_loader', test 68 # resolving will happen at test runtime rather than in the taskgraph. 69 if "test-manifest-loader" in task and task["test-manifest-loader"] is None: 70 yield task 71 continue 72 73 # Set 'tests_grouped' to "1", so we can differentiate between suites that are 74 # chunked at the test runtime and those that are chunked in the taskgraph. 75 task.setdefault("tags", {})["tests_grouped"] = "1" 76 77 if taskgraph.fast: 78 # We want to avoid evaluating manifests when taskgraph.fast is set. But 79 # manifests are required for dynamic chunking. Just set the number of 80 # chunks to one in this case. 81 if task["chunks"] == "dynamic": 82 task["chunks"] = 1 83 yield task 84 continue 85 86 manifests = task.get("test-manifests") 87 if manifests: 88 if isinstance(manifests, list): 89 task["test-manifests"] = {"active": manifests, "skipped": []} 90 yield task 91 continue 92 93 mozinfo = guess_mozinfo_from_task( 94 task, 95 config.params.get("head_repository", ""), 96 config.params.get("app_version", ""), 97 get_test_tags(config, task.get("worker", {}).get("env", {})), 98 ) 99 100 loader_name = task.pop( 101 "test-manifest-loader", config.params["test_manifest_loader"] 102 ) 103 loader = get_manifest_loader(loader_name, config.params) 104 105 task["test-manifests"] = loader.get_manifests( 106 task["suite"], 107 frozenset(mozinfo.items()), 108 ) 109 110 # When scheduling with test paths, we often find manifests scheduled but all tests 111 # are skipped on a given config. This will remove the task from the task set if 112 # no manifests have active tests for the given task/config 113 mh_test_paths = {} 114 if "MOZHARNESS_TEST_PATHS" in config.params.get("try_task_config", {}).get( 115 "env", {} 116 ): 117 mh_test_paths = json.loads( 118 config.params["try_task_config"]["env"]["MOZHARNESS_TEST_PATHS"] 119 ) 120 121 if ( 122 mh_test_paths 123 and task["attributes"]["unittest_suite"] in mh_test_paths.keys() 124 ): 125 input_paths = mh_test_paths[task["attributes"]["unittest_suite"]] 126 remaining_manifests = [] 127 128 # if we have web-platform tests incoming, just yield task 129 found_wpt = False 130 for m in input_paths: 131 if m.startswith("testing/web-platform/tests/"): 132 found_subsuite = [ 133 key for key in WPT_SUBSUITES if key in task["test-name"] 134 ] 135 if found_subsuite: 136 if any( 137 test_subsuite in m 138 for test_subsuite in WPT_SUBSUITES[found_subsuite[0]] 139 ): 140 yield task 141 else: 142 if not isinstance(loader, DefaultLoader): 143 task["chunks"] = "dynamic" 144 yield task 145 found_wpt = True 146 break 147 if found_wpt: 148 continue 149 150 # input paths can exist in other directories (i.e. [../../dir/test.js]) 151 # we need to look for all [active] manifests that include tests in the path 152 for m in input_paths: 153 if [tm for tm in task["test-manifests"]["active"] if tm.startswith(m)]: 154 remaining_manifests.append(m) 155 156 # look in the 'other' manifests 157 for m in input_paths: 158 man = m 159 for tm in task["test-manifests"]["other_dirs"]: 160 matched_dirs = [ 161 dp 162 for dp in task["test-manifests"]["other_dirs"].get(tm) 163 if dp.startswith(man) 164 ] 165 if matched_dirs: 166 if tm not in task["test-manifests"]["active"]: 167 continue 168 if m not in remaining_manifests: 169 remaining_manifests.append(m) 170 171 if remaining_manifests == []: 172 continue 173 174 elif mh_test_paths: 175 # we have test paths and they are not related to the test suite 176 # this could be the test suite doesn't support test paths 177 continue 178 elif ( 179 get_test_tags(config, task.get("worker", {}).get("env", {})) 180 and not task["test-manifests"]["active"] 181 and not task["test-manifests"]["other_dirs"] 182 ): 183 # no MH_TEST_PATHS, but MH_TEST_TAG or other filters 184 continue 185 186 # The default loader loads all manifests. If we use a non-default 187 # loader, we'll only run some subset of manifests and the hardcoded 188 # chunk numbers will no longer be valid. Dynamic chunking should yield 189 # better results. 190 if not isinstance(loader, DefaultLoader): 191 task["chunks"] = "dynamic" 192 193 yield task 194 195 196 @transforms.add 197 def resolve_dynamic_chunks(config, tasks): 198 """Determine how many chunks are needed to handle the given set of manifests.""" 199 200 for task in tasks: 201 if task["chunks"] != "dynamic": 202 yield task 203 continue 204 205 if not task.get("test-manifests"): 206 raise Exception( 207 "{} must define 'test-manifests' to use dynamic chunking!".format( 208 task["test-name"] 209 ) 210 ) 211 212 runtimes = { 213 m: r 214 for m, r in get_runtimes(task["test-platform"], task["suite"]).items() 215 if m in task["test-manifests"]["active"] 216 } 217 218 # Truncate runtimes that are above the desired chunk duration. They 219 # will be assigned to a chunk on their own and the excess duration 220 # shouldn't cause additional chunks to be needed. 221 times = [min(DYNAMIC_CHUNK_DURATION, r) for r in runtimes.values()] 222 avg = round(sum(times) / len(times), 2) if times else 0 223 total = sum(times) 224 225 # If there are manifests missing from the runtimes data, fill them in 226 # with the average of all present manifests. 227 missing = [m for m in task["test-manifests"]["active"] if m not in runtimes] 228 total += avg * len(missing) 229 230 # Apply any chunk multipliers if found. 231 key = "{}-{}".format(task["test-platform"], task["test-name"]) 232 matches = keymatch(DYNAMIC_CHUNK_MULTIPLIER, key) 233 if len(matches) > 1: 234 raise Exception( 235 f"Multiple matching values for {key} found while " 236 "determining dynamic chunk multiplier!" 237 ) 238 elif matches: 239 total = total * matches[0] 240 241 chunks = int(round(total / DYNAMIC_CHUNK_DURATION)) 242 243 # Make sure we never exceed the number of manifests, nor have a chunk 244 # length of 0. 245 task["chunks"] = min(chunks, len(task["test-manifests"]["active"])) or 1 246 yield task 247 248 249 @transforms.add 250 def split_chunks(config, tasks): 251 """Based on the 'chunks' key, split tests up into chunks by duplicating 252 them and assigning 'this-chunk' appropriately and updating the treeherder 253 symbol. 254 """ 255 256 for task in tasks: 257 # If test-manifests are set, chunk them ahead of time to avoid running 258 # the algorithm more than once. 259 chunked_manifests = None 260 if "test-manifests" in task: 261 # TODO: hardcoded to "2", ideally this should be centralized somewhere 262 if ( 263 config.params["try_task_config"].get("new-test-config", False) 264 and task["chunks"] > 1 265 ): 266 task["chunks"] *= 2 267 task["max-run-time"] = int(task["max-run-time"] * 2) 268 269 manifests = task["test-manifests"] 270 chunked_manifests = chunk_manifests( 271 task["suite"], 272 task["test-platform"], 273 task["chunks"], 274 manifests["active"], 275 ) 276 277 # Add all skipped manifests to the first chunk of backstop pushes 278 # so they still show up in the logs. They won't impact runtime much 279 # and this way tools like ActiveData are still aware that they 280 # exist. 281 if ( 282 config.params["backstop"] 283 and manifests["active"] 284 and "skipped" in manifests 285 ): 286 chunked_manifests[0].extend([ 287 m for m in manifests["skipped"] if not m.endswith(".list") 288 ]) 289 last_chunk = task["chunks"] 290 for i in range(task["chunks"]): 291 this_chunk = i + 1 292 293 # copy the test and update with the chunk number 294 chunked = deepcopy(task) if this_chunk != last_chunk else task 295 chunked["this-chunk"] = this_chunk 296 297 if chunked_manifests is not None: 298 chunked["test-manifests"] = sorted(chunked_manifests[i]) 299 300 group, symbol = split_symbol(chunked["treeherder-symbol"]) 301 if task["chunks"] > 1 or not symbol: 302 # add the chunk number to the TH symbol 303 symbol += str(this_chunk) 304 chunked["treeherder-symbol"] = join_symbol(group, symbol) 305 306 yield chunked