backfill.py (16417B)
1 # This Source Code Form is subject to the terms of the Mozilla Public 2 # License, v. 2.0. If a copy of the MPL was not distributed with this 3 # file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 5 6 import logging 7 import re 8 import sys 9 from functools import partial 10 11 from taskgraph.util import json 12 from taskgraph.util.taskcluster import get_task_definition 13 14 from .registry import register_callback_action 15 from .util import ( 16 combine_task_graph_files, 17 create_tasks, 18 fetch_graph_and_labels, 19 get_decision_task_id, 20 get_pushes, 21 get_pushes_from_params_input, 22 trigger_action, 23 ) 24 25 logger = logging.getLogger(__name__) 26 SYMBOL_REGEX = re.compile("^(.*)-[a-z0-9]{11}-bk$") 27 GROUP_SYMBOL_REGEX = re.compile("^(.*)-bk$") 28 29 # Allowed browser applications for performance test backfills 30 # Only Firefox and Geckoview should be backfilled for regression detection 31 ALLOWED_PERFTEST_BACKFILL_APPS = ( 32 "firefox", 33 "geckoview", 34 "fenix", 35 ) 36 37 38 def input_for_support_action(revision, task, times=1, retrigger=True): 39 """Generate input for action to be scheduled. 40 41 Define what label to schedule with 'label'. 42 If it is a test task that uses explicit manifests add that information. 43 """ 44 input = { 45 "label": task["metadata"]["name"], 46 "revision": revision, 47 "times": times, 48 # We want the backfilled tasks to share the same symbol as the originating task 49 "symbol": task["extra"]["treeherder"]["symbol"], 50 "retrigger": retrigger, 51 } 52 53 # Support tasks that are using manifest based scheduling 54 if task["payload"].get("env", {}).get("MOZHARNESS_TEST_PATHS"): 55 input["test_manifests"] = json.loads( 56 task["payload"]["env"]["MOZHARNESS_TEST_PATHS"] 57 ) 58 59 return input 60 61 62 @register_callback_action( 63 title="Backfill", 64 name="backfill", 65 permission="backfill", 66 symbol="Bk", 67 description=("Given a task schedule it on previous pushes in the same project."), 68 order=200, 69 context=[{}], # This will be available for all tasks 70 schema={ 71 "type": "object", 72 "properties": { 73 "depth": { 74 "type": "integer", 75 "default": 19, 76 "minimum": 1, 77 "maximum": 25, 78 "title": "Depth", 79 "description": ( 80 "The number of previous pushes before the current " 81 "push to attempt to trigger this task on." 82 ), 83 }, 84 "inclusive": { 85 "type": "boolean", 86 "default": False, 87 "title": "Inclusive Range", 88 "description": ( 89 "If true, the backfill will also retrigger the task " 90 "on the selected push." 91 ), 92 }, 93 "times": { 94 "type": "integer", 95 "default": 1, 96 "minimum": 1, 97 "maximum": 10, 98 "title": "Times", 99 "description": ( 100 "The number of times to execute each job you are backfilling." 101 ), 102 }, 103 "retrigger": { 104 "type": "boolean", 105 "default": True, 106 "title": "Retrigger", 107 "description": ( 108 "If False, the task won't retrigger on pushes that have already " 109 "ran it." 110 ), 111 }, 112 }, 113 "additionalProperties": False, 114 }, 115 available=lambda parameters: True, 116 ) 117 def backfill_action(parameters, graph_config, input, task_group_id, task_id): 118 """ 119 This action takes a task ID and schedules it on previous pushes (via support action). 120 121 To execute this action locally follow the documentation here: 122 https://taskcluster-taskgraph.readthedocs.io/en/latest/howto/create-actions.html#testing-actions 123 """ 124 task = get_task_definition(task_id) 125 126 # Only backfill allowed browser applications for performance tests 127 task_label = task.get("metadata", {}).get("name", "") 128 is_browsertime = "browsertime" in task_label 129 130 if is_browsertime and not any( 131 app in task_label for app in ALLOWED_PERFTEST_BACKFILL_APPS 132 ): 133 logger.warning( 134 f"Skipping backfill for non-allowed raptor-browsertime task: {task_label}" 135 ) 136 return 137 138 pushes = get_pushes_from_params_input(parameters, input) 139 failed = False 140 input_for_action = input_for_support_action( 141 revision=parameters["head_rev"], 142 task=task, 143 times=input.get("times", 1), 144 retrigger=input.get("retrigger", True), 145 ) 146 147 for push_id in pushes: 148 try: 149 # The Gecko decision task can sometimes fail on a push and we need to handle 150 # the exception that this call will produce 151 push_decision_task_id = get_decision_task_id(parameters["project"], push_id) 152 except Exception: 153 logger.warning(f"Could not find decision task for push {push_id}") 154 # The decision task may have failed, this is common enough that we 155 # don't want to report an error for it. 156 continue 157 158 try: 159 trigger_action( 160 action_name="backfill-task", 161 # This lets the action know on which push we want to add a new task 162 decision_task_id=push_decision_task_id, 163 input=input_for_action, 164 ) 165 except Exception: 166 logger.exception(f"Failed to trigger action for {push_id}") 167 failed = True 168 169 if failed: 170 sys.exit(1) 171 172 173 def add_backfill_suffix(regex, symbol, suffix): 174 m = regex.match(symbol) 175 if m is None: 176 symbol += suffix 177 return symbol 178 179 180 def backfill_modifier(task, input): 181 if task.label != input["label"]: 182 return task 183 184 logger.debug(f"Modifying test_manifests for {task.label}") 185 times = input.get("times", 1) 186 187 # Set task duplicates based on 'times' value. 188 if times > 1: 189 task.attributes["task_duplicates"] = times 190 191 # If the original task has defined test paths 192 test_manifests = input.get("test_manifests") 193 if test_manifests: 194 revision = input.get("revision") 195 196 task.attributes["test_manifests"] = test_manifests 197 task.task["payload"]["env"]["MOZHARNESS_TEST_PATHS"] = json.dumps( 198 test_manifests 199 ) 200 # The name/label might have been modify in new_label, thus, change it here as well 201 task.task["metadata"]["name"] = task.label 202 th_info = task.task["extra"]["treeherder"] 203 # Use a job symbol of the originating task as defined in the backfill action 204 th_info["symbol"] = add_backfill_suffix( 205 SYMBOL_REGEX, th_info["symbol"], f"-{revision[0:11]}-bk" 206 ) 207 if th_info.get("groupSymbol"): 208 # Group all backfilled tasks together 209 th_info["groupSymbol"] = add_backfill_suffix( 210 GROUP_SYMBOL_REGEX, th_info["groupSymbol"], "-bk" 211 ) 212 task.task["tags"]["action"] = "backfill-task" 213 return task 214 215 216 def do_not_modify(task): 217 return task 218 219 220 def new_label(label, tasks): 221 """This is to handle the case when a previous push does not contain a specific task label 222 and we try to find a label we can reuse. 223 224 For instance, we try to backfill chunk #3, however, a previous push does not contain such 225 chunk, thus, we try to reuse another task/label. 226 """ 227 logger.info(f"Extracting new label for {label}") 228 229 if "-" not in label: 230 raise Exception( 231 f"Expected '-' was not found in label {label}, cannot extract new label." 232 ) 233 234 begining_label, ending = label.rsplit("-", 1) 235 236 if ending.isdigit(): 237 # We assume that the taskgraph has chunk #1 OR unnumbered chunk and we hijack it 238 if begining_label in tasks: 239 return begining_label 240 if begining_label + "-1" in tasks: 241 return begining_label + "-1" 242 raise Exception(f"New label ({label}) was not found in the task-graph") 243 else: 244 raise Exception(f"{label} was not found in the task-graph") 245 246 247 @register_callback_action( 248 name="backfill-task", 249 title="Backfill task on a push.", 250 permission="backfill", 251 symbol="backfill-task", 252 description="This action is normally scheduled by the backfill action. " 253 "The intent is to schedule a task on previous pushes.", 254 order=500, 255 context=[], 256 schema={ 257 "type": "object", 258 "properties": { 259 "label": {"type": "string", "description": "A task label"}, 260 "revision": { 261 "type": "string", 262 "description": "Revision of the original push from where we backfill.", 263 }, 264 "symbol": { 265 "type": "string", 266 "description": "Symbol to be used by the scheduled task.", 267 }, 268 "test_manifests": { 269 "type": "array", 270 "default": [], 271 "description": "An array of test manifest paths", 272 "items": {"type": "string"}, 273 }, 274 "times": { 275 "type": "integer", 276 "default": 1, 277 "minimum": 1, 278 "maximum": 10, 279 "title": "Times", 280 "description": ( 281 "The number of times to execute each job you are backfilling." 282 ), 283 }, 284 "retrigger": { 285 "type": "boolean", 286 "default": True, 287 "title": "Retrigger", 288 "description": ( 289 "If False, the task won't retrigger on pushes that have already " 290 "ran it." 291 ), 292 }, 293 }, 294 }, 295 ) 296 def add_task_with_original_manifests( 297 parameters, graph_config, input, task_group_id, task_id 298 ): 299 """ 300 This action is normally scheduled by the backfill action. The intent is to schedule a test 301 task with the test manifests from the original task (if available). 302 303 The push in which we want to schedule a new task is defined by the parameters object. 304 305 To execute this action locally follow the documentation here: 306 https://taskcluster-taskgraph.readthedocs.io/en/latest/howto/create-actions.html#testing-actions 307 """ 308 # This step takes a lot of time when executed locally 309 logger.info("Retreving the full task graph and labels.") 310 decision_task_id, full_task_graph, label_to_taskid, _ = fetch_graph_and_labels( 311 parameters, graph_config 312 ) 313 314 label = input.get("label") 315 if not input.get("retrigger") and label in label_to_taskid: 316 logger.info( 317 f"Skipping push with decision task ID {decision_task_id} as it already has this test." 318 ) 319 return 320 321 if label not in full_task_graph.tasks: 322 label = new_label(label, full_task_graph.tasks) 323 324 to_run = [label] 325 326 logger.info("Creating tasks...") 327 create_tasks( 328 graph_config, 329 to_run, 330 full_task_graph, 331 label_to_taskid, 332 parameters, 333 decision_task_id, 334 suffix="0", 335 modifier=partial(backfill_modifier, input=input), 336 ) 337 338 # TODO Implement a way to write out artifacts without assuming there's 339 # multiple sets of them so we can stop passing in "suffix". 340 combine_task_graph_files(["0"]) 341 342 343 @register_callback_action( 344 title="Backfill all browsertime", 345 name="backfill-all-browsertime", 346 permission="backfill", 347 symbol="baB", 348 description=( 349 "Schedule all browsertime tests for the current and previous push in the same project." 350 ), 351 order=800, 352 context=[], # This will be available for all tasks 353 available=lambda parameters: True, 354 ) 355 def backfill_all_browsertime(parameters, graph_config, input, task_group_id, task_id): 356 """ 357 This action takes a revision and schedules it on previous pushes (via support action). 358 359 To execute this action locally follow the documentation here: 360 https://taskcluster-taskgraph.readthedocs.io/en/latest/howto/create-actions.html#testing-actions 361 """ 362 pushes = get_pushes( 363 project=parameters["head_repository"], 364 end_id=int(parameters["pushlog_id"]), 365 depth=2, 366 ) 367 368 for push_id in pushes: 369 try: 370 # The Gecko decision task can sometimes fail on a push and we need to handle 371 # the exception that this call will produce 372 push_decision_task_id = get_decision_task_id(parameters["project"], push_id) 373 except Exception: 374 logger.warning(f"Could not find decision task for push {push_id}") 375 # The decision task may have failed, this is common enough that we 376 # don't want to report an error for it. 377 continue 378 379 try: 380 trigger_action( 381 action_name="add-all-browsertime", 382 # This lets the action know on which push we want to add a new task 383 decision_task_id=push_decision_task_id, 384 ) 385 except Exception: 386 logger.exception(f"Failed to trigger action for {push_id}") 387 sys.exit(1) 388 389 390 def filter_raptor_jobs(full_task_graph, label_to_taskid, project): 391 # Late import to prevent impacting other backfill action tasks 392 from ..util.attributes import match_run_on_projects 393 394 to_run = [] 395 for label, entry in full_task_graph.tasks.items(): 396 if entry.kind != "test": 397 continue 398 if entry.task.get("extra", {}).get("suite", "") != "raptor": 399 continue 400 if not match_run_on_projects( 401 {"project": project}, entry.attributes.get("run_on_projects", []) 402 ): 403 continue 404 if "macosx1500" in entry.attributes.get("test_platform", ""): 405 continue 406 if "browsertime" not in entry.attributes.get("raptor_try_name", ""): 407 continue 408 if "shippable" not in entry.attributes.get("test_platform", ""): 409 continue 410 if "android" in entry.attributes.get("test_platform", ""): 411 # Bug 1786254 - The backfill bot is scheduling too many tests atm 412 continue 413 exceptions = ("live", "profiling", "youtube-playback") 414 if any(e in entry.attributes.get("raptor_try_name", "") for e in exceptions): 415 continue 416 # Only run on allowed browser applications. 417 raptor_try_name = entry.attributes.get("raptor_try_name", "") 418 if not any(app in raptor_try_name for app in ALLOWED_PERFTEST_BACKFILL_APPS): 419 continue 420 if "firefox" in raptor_try_name and entry.attributes.get( 421 "test_platform", "" 422 ).endswith("64-shippable-qr/opt"): 423 # add the browsertime test 424 if label not in label_to_taskid: 425 to_run.append(label) 426 if "geckoview" in raptor_try_name: 427 # add the pageload test 428 if label not in label_to_taskid: 429 to_run.append(label) 430 return to_run 431 432 433 @register_callback_action( 434 name="add-all-browsertime", 435 title="Add All Browsertime Tests.", 436 permission="backfill", 437 symbol="aaB", 438 description="This action is normally scheduled by the backfill-all-browsertime action. " 439 "The intent is to schedule all browsertime tests on a specific pushe.", 440 order=900, 441 context=[], 442 ) 443 def add_all_browsertime(parameters, graph_config, input, task_group_id, task_id): 444 """ 445 This action is normally scheduled by the backfill-all-browsertime action. The intent is to 446 trigger all browsertime tasks for the current revision. 447 448 The push in which we want to schedule a new task is defined by the parameters object. 449 450 To execute this action locally follow the documentation here: 451 https://taskcluster-taskgraph.readthedocs.io/en/latest/howto/create-actions.html#testing-actions 452 """ 453 logger.info("Retreving the full task graph and labels.") 454 decision_task_id, full_task_graph, label_to_taskid, _ = fetch_graph_and_labels( 455 parameters, graph_config 456 ) 457 458 to_run = filter_raptor_jobs(full_task_graph, label_to_taskid, parameters["project"]) 459 460 create_tasks( 461 graph_config, 462 to_run, 463 full_task_graph, 464 label_to_taskid, 465 parameters, 466 decision_task_id, 467 ) 468 logger.info(f"Scheduled {len(to_run)} raptor tasks (time 1)")