tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

backfill.py (16417B)


      1 # This Source Code Form is subject to the terms of the Mozilla Public
      2 # License, v. 2.0. If a copy of the MPL was not distributed with this
      3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
      4 
      5 
      6 import logging
      7 import re
      8 import sys
      9 from functools import partial
     10 
     11 from taskgraph.util import json
     12 from taskgraph.util.taskcluster import get_task_definition
     13 
     14 from .registry import register_callback_action
     15 from .util import (
     16    combine_task_graph_files,
     17    create_tasks,
     18    fetch_graph_and_labels,
     19    get_decision_task_id,
     20    get_pushes,
     21    get_pushes_from_params_input,
     22    trigger_action,
     23 )
     24 
     25 logger = logging.getLogger(__name__)
     26 SYMBOL_REGEX = re.compile("^(.*)-[a-z0-9]{11}-bk$")
     27 GROUP_SYMBOL_REGEX = re.compile("^(.*)-bk$")
     28 
     29 # Allowed browser applications for performance test backfills
     30 # Only Firefox and Geckoview should be backfilled for regression detection
     31 ALLOWED_PERFTEST_BACKFILL_APPS = (
     32    "firefox",
     33    "geckoview",
     34    "fenix",
     35 )
     36 
     37 
     38 def input_for_support_action(revision, task, times=1, retrigger=True):
     39    """Generate input for action to be scheduled.
     40 
     41    Define what label to schedule with 'label'.
     42    If it is a test task that uses explicit manifests add that information.
     43    """
     44    input = {
     45        "label": task["metadata"]["name"],
     46        "revision": revision,
     47        "times": times,
     48        # We want the backfilled tasks to share the same symbol as the originating task
     49        "symbol": task["extra"]["treeherder"]["symbol"],
     50        "retrigger": retrigger,
     51    }
     52 
     53    # Support tasks that are using manifest based scheduling
     54    if task["payload"].get("env", {}).get("MOZHARNESS_TEST_PATHS"):
     55        input["test_manifests"] = json.loads(
     56            task["payload"]["env"]["MOZHARNESS_TEST_PATHS"]
     57        )
     58 
     59    return input
     60 
     61 
     62 @register_callback_action(
     63    title="Backfill",
     64    name="backfill",
     65    permission="backfill",
     66    symbol="Bk",
     67    description=("Given a task schedule it on previous pushes in the same project."),
     68    order=200,
     69    context=[{}],  # This will be available for all tasks
     70    schema={
     71        "type": "object",
     72        "properties": {
     73            "depth": {
     74                "type": "integer",
     75                "default": 19,
     76                "minimum": 1,
     77                "maximum": 25,
     78                "title": "Depth",
     79                "description": (
     80                    "The number of previous pushes before the current "
     81                    "push to attempt to trigger this task on."
     82                ),
     83            },
     84            "inclusive": {
     85                "type": "boolean",
     86                "default": False,
     87                "title": "Inclusive Range",
     88                "description": (
     89                    "If true, the backfill will also retrigger the task "
     90                    "on the selected push."
     91                ),
     92            },
     93            "times": {
     94                "type": "integer",
     95                "default": 1,
     96                "minimum": 1,
     97                "maximum": 10,
     98                "title": "Times",
     99                "description": (
    100                    "The number of times to execute each job you are backfilling."
    101                ),
    102            },
    103            "retrigger": {
    104                "type": "boolean",
    105                "default": True,
    106                "title": "Retrigger",
    107                "description": (
    108                    "If False, the task won't retrigger on pushes that have already "
    109                    "ran it."
    110                ),
    111            },
    112        },
    113        "additionalProperties": False,
    114    },
    115    available=lambda parameters: True,
    116 )
    117 def backfill_action(parameters, graph_config, input, task_group_id, task_id):
    118    """
    119    This action takes a task ID and schedules it on previous pushes (via support action).
    120 
    121    To execute this action locally follow the documentation here:
    122    https://taskcluster-taskgraph.readthedocs.io/en/latest/howto/create-actions.html#testing-actions
    123    """
    124    task = get_task_definition(task_id)
    125 
    126    # Only backfill allowed browser applications for performance tests
    127    task_label = task.get("metadata", {}).get("name", "")
    128    is_browsertime = "browsertime" in task_label
    129 
    130    if is_browsertime and not any(
    131        app in task_label for app in ALLOWED_PERFTEST_BACKFILL_APPS
    132    ):
    133        logger.warning(
    134            f"Skipping backfill for non-allowed raptor-browsertime task: {task_label}"
    135        )
    136        return
    137 
    138    pushes = get_pushes_from_params_input(parameters, input)
    139    failed = False
    140    input_for_action = input_for_support_action(
    141        revision=parameters["head_rev"],
    142        task=task,
    143        times=input.get("times", 1),
    144        retrigger=input.get("retrigger", True),
    145    )
    146 
    147    for push_id in pushes:
    148        try:
    149            # The Gecko decision task can sometimes fail on a push and we need to handle
    150            # the exception that this call will produce
    151            push_decision_task_id = get_decision_task_id(parameters["project"], push_id)
    152        except Exception:
    153            logger.warning(f"Could not find decision task for push {push_id}")
    154            # The decision task may have failed, this is common enough that we
    155            # don't want to report an error for it.
    156            continue
    157 
    158        try:
    159            trigger_action(
    160                action_name="backfill-task",
    161                # This lets the action know on which push we want to add a new task
    162                decision_task_id=push_decision_task_id,
    163                input=input_for_action,
    164            )
    165        except Exception:
    166            logger.exception(f"Failed to trigger action for {push_id}")
    167            failed = True
    168 
    169    if failed:
    170        sys.exit(1)
    171 
    172 
    173 def add_backfill_suffix(regex, symbol, suffix):
    174    m = regex.match(symbol)
    175    if m is None:
    176        symbol += suffix
    177    return symbol
    178 
    179 
    180 def backfill_modifier(task, input):
    181    if task.label != input["label"]:
    182        return task
    183 
    184    logger.debug(f"Modifying test_manifests for {task.label}")
    185    times = input.get("times", 1)
    186 
    187    # Set task duplicates based on 'times' value.
    188    if times > 1:
    189        task.attributes["task_duplicates"] = times
    190 
    191    # If the original task has defined test paths
    192    test_manifests = input.get("test_manifests")
    193    if test_manifests:
    194        revision = input.get("revision")
    195 
    196        task.attributes["test_manifests"] = test_manifests
    197        task.task["payload"]["env"]["MOZHARNESS_TEST_PATHS"] = json.dumps(
    198            test_manifests
    199        )
    200        # The name/label might have been modify in new_label, thus, change it here as well
    201        task.task["metadata"]["name"] = task.label
    202        th_info = task.task["extra"]["treeherder"]
    203        # Use a job symbol of the originating task as defined in the backfill action
    204        th_info["symbol"] = add_backfill_suffix(
    205            SYMBOL_REGEX, th_info["symbol"], f"-{revision[0:11]}-bk"
    206        )
    207        if th_info.get("groupSymbol"):
    208            # Group all backfilled tasks together
    209            th_info["groupSymbol"] = add_backfill_suffix(
    210                GROUP_SYMBOL_REGEX, th_info["groupSymbol"], "-bk"
    211            )
    212        task.task["tags"]["action"] = "backfill-task"
    213    return task
    214 
    215 
    216 def do_not_modify(task):
    217    return task
    218 
    219 
    220 def new_label(label, tasks):
    221    """This is to handle the case when a previous push does not contain a specific task label
    222    and we try to find a label we can reuse.
    223 
    224    For instance, we try to backfill chunk #3, however, a previous push does not contain such
    225    chunk, thus, we try to reuse another task/label.
    226    """
    227    logger.info(f"Extracting new label for {label}")
    228 
    229    if "-" not in label:
    230        raise Exception(
    231            f"Expected '-' was not found in label {label}, cannot extract new label."
    232        )
    233 
    234    begining_label, ending = label.rsplit("-", 1)
    235 
    236    if ending.isdigit():
    237        # We assume that the taskgraph has chunk #1 OR unnumbered chunk and we hijack it
    238        if begining_label in tasks:
    239            return begining_label
    240        if begining_label + "-1" in tasks:
    241            return begining_label + "-1"
    242        raise Exception(f"New label ({label}) was not found in the task-graph")
    243    else:
    244        raise Exception(f"{label} was not found in the task-graph")
    245 
    246 
    247 @register_callback_action(
    248    name="backfill-task",
    249    title="Backfill task on a push.",
    250    permission="backfill",
    251    symbol="backfill-task",
    252    description="This action is normally scheduled by the backfill action. "
    253    "The intent is to schedule a task on previous pushes.",
    254    order=500,
    255    context=[],
    256    schema={
    257        "type": "object",
    258        "properties": {
    259            "label": {"type": "string", "description": "A task label"},
    260            "revision": {
    261                "type": "string",
    262                "description": "Revision of the original push from where we backfill.",
    263            },
    264            "symbol": {
    265                "type": "string",
    266                "description": "Symbol to be used by the scheduled task.",
    267            },
    268            "test_manifests": {
    269                "type": "array",
    270                "default": [],
    271                "description": "An array of test manifest paths",
    272                "items": {"type": "string"},
    273            },
    274            "times": {
    275                "type": "integer",
    276                "default": 1,
    277                "minimum": 1,
    278                "maximum": 10,
    279                "title": "Times",
    280                "description": (
    281                    "The number of times to execute each job you are backfilling."
    282                ),
    283            },
    284            "retrigger": {
    285                "type": "boolean",
    286                "default": True,
    287                "title": "Retrigger",
    288                "description": (
    289                    "If False, the task won't retrigger on pushes that have already "
    290                    "ran it."
    291                ),
    292            },
    293        },
    294    },
    295 )
    296 def add_task_with_original_manifests(
    297    parameters, graph_config, input, task_group_id, task_id
    298 ):
    299    """
    300    This action is normally scheduled by the backfill action. The intent is to schedule a test
    301    task with the test manifests from the original task (if available).
    302 
    303    The push in which we want to schedule a new task is defined by the parameters object.
    304 
    305    To execute this action locally follow the documentation here:
    306    https://taskcluster-taskgraph.readthedocs.io/en/latest/howto/create-actions.html#testing-actions
    307    """
    308    # This step takes a lot of time when executed locally
    309    logger.info("Retreving the full task graph and labels.")
    310    decision_task_id, full_task_graph, label_to_taskid, _ = fetch_graph_and_labels(
    311        parameters, graph_config
    312    )
    313 
    314    label = input.get("label")
    315    if not input.get("retrigger") and label in label_to_taskid:
    316        logger.info(
    317            f"Skipping push with decision task ID {decision_task_id} as it already has this test."
    318        )
    319        return
    320 
    321    if label not in full_task_graph.tasks:
    322        label = new_label(label, full_task_graph.tasks)
    323 
    324    to_run = [label]
    325 
    326    logger.info("Creating tasks...")
    327    create_tasks(
    328        graph_config,
    329        to_run,
    330        full_task_graph,
    331        label_to_taskid,
    332        parameters,
    333        decision_task_id,
    334        suffix="0",
    335        modifier=partial(backfill_modifier, input=input),
    336    )
    337 
    338    # TODO Implement a way to write out artifacts without assuming there's
    339    # multiple sets of them so we can stop passing in "suffix".
    340    combine_task_graph_files(["0"])
    341 
    342 
    343 @register_callback_action(
    344    title="Backfill all browsertime",
    345    name="backfill-all-browsertime",
    346    permission="backfill",
    347    symbol="baB",
    348    description=(
    349        "Schedule all browsertime tests for the current and previous push in the same project."
    350    ),
    351    order=800,
    352    context=[],  # This will be available for all tasks
    353    available=lambda parameters: True,
    354 )
    355 def backfill_all_browsertime(parameters, graph_config, input, task_group_id, task_id):
    356    """
    357    This action takes a revision and schedules it on previous pushes (via support action).
    358 
    359    To execute this action locally follow the documentation here:
    360    https://taskcluster-taskgraph.readthedocs.io/en/latest/howto/create-actions.html#testing-actions
    361    """
    362    pushes = get_pushes(
    363        project=parameters["head_repository"],
    364        end_id=int(parameters["pushlog_id"]),
    365        depth=2,
    366    )
    367 
    368    for push_id in pushes:
    369        try:
    370            # The Gecko decision task can sometimes fail on a push and we need to handle
    371            # the exception that this call will produce
    372            push_decision_task_id = get_decision_task_id(parameters["project"], push_id)
    373        except Exception:
    374            logger.warning(f"Could not find decision task for push {push_id}")
    375            # The decision task may have failed, this is common enough that we
    376            # don't want to report an error for it.
    377            continue
    378 
    379        try:
    380            trigger_action(
    381                action_name="add-all-browsertime",
    382                # This lets the action know on which push we want to add a new task
    383                decision_task_id=push_decision_task_id,
    384            )
    385        except Exception:
    386            logger.exception(f"Failed to trigger action for {push_id}")
    387            sys.exit(1)
    388 
    389 
    390 def filter_raptor_jobs(full_task_graph, label_to_taskid, project):
    391    # Late import to prevent impacting other backfill action tasks
    392    from ..util.attributes import match_run_on_projects
    393 
    394    to_run = []
    395    for label, entry in full_task_graph.tasks.items():
    396        if entry.kind != "test":
    397            continue
    398        if entry.task.get("extra", {}).get("suite", "") != "raptor":
    399            continue
    400        if not match_run_on_projects(
    401            {"project": project}, entry.attributes.get("run_on_projects", [])
    402        ):
    403            continue
    404        if "macosx1500" in entry.attributes.get("test_platform", ""):
    405            continue
    406        if "browsertime" not in entry.attributes.get("raptor_try_name", ""):
    407            continue
    408        if "shippable" not in entry.attributes.get("test_platform", ""):
    409            continue
    410        if "android" in entry.attributes.get("test_platform", ""):
    411            # Bug 1786254 - The backfill bot is scheduling too many tests atm
    412            continue
    413        exceptions = ("live", "profiling", "youtube-playback")
    414        if any(e in entry.attributes.get("raptor_try_name", "") for e in exceptions):
    415            continue
    416        # Only run on allowed browser applications.
    417        raptor_try_name = entry.attributes.get("raptor_try_name", "")
    418        if not any(app in raptor_try_name for app in ALLOWED_PERFTEST_BACKFILL_APPS):
    419            continue
    420        if "firefox" in raptor_try_name and entry.attributes.get(
    421            "test_platform", ""
    422        ).endswith("64-shippable-qr/opt"):
    423            # add the browsertime test
    424            if label not in label_to_taskid:
    425                to_run.append(label)
    426        if "geckoview" in raptor_try_name:
    427            # add the pageload test
    428            if label not in label_to_taskid:
    429                to_run.append(label)
    430    return to_run
    431 
    432 
    433 @register_callback_action(
    434    name="add-all-browsertime",
    435    title="Add All Browsertime Tests.",
    436    permission="backfill",
    437    symbol="aaB",
    438    description="This action is normally scheduled by the backfill-all-browsertime action. "
    439    "The intent is to schedule all browsertime tests on a specific pushe.",
    440    order=900,
    441    context=[],
    442 )
    443 def add_all_browsertime(parameters, graph_config, input, task_group_id, task_id):
    444    """
    445    This action is normally scheduled by the backfill-all-browsertime action. The intent is to
    446    trigger all browsertime tasks for the current revision.
    447 
    448    The push in which we want to schedule a new task is defined by the parameters object.
    449 
    450    To execute this action locally follow the documentation here:
    451    https://taskcluster-taskgraph.readthedocs.io/en/latest/howto/create-actions.html#testing-actions
    452    """
    453    logger.info("Retreving the full task graph and labels.")
    454    decision_task_id, full_task_graph, label_to_taskid, _ = fetch_graph_and_labels(
    455        parameters, graph_config
    456    )
    457 
    458    to_run = filter_raptor_jobs(full_task_graph, label_to_taskid, parameters["project"])
    459 
    460    create_tasks(
    461        graph_config,
    462        to_run,
    463        full_task_graph,
    464        label_to_taskid,
    465        parameters,
    466        decision_task_id,
    467    )
    468    logger.info(f"Scheduled {len(to_run)} raptor tasks (time 1)")