tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

copy-artifacts-from-ftl.py (13639B)


      1 #!/usr/bin/env python3
      2 
      3 # This Source Code Form is subject to the terms of the Mozilla Public
      4 # License, v. 2.0. If a copy of the MPL was not distributed with this
      5 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
      6 
      7 """
      8 This script is designed to automate the process of fetching artifacts (either baseline profile or crash logs)
      9 from Google Cloud Storage (GCS) for devices in Firebase TestLab.
     10 It is intended to be run as part of a Taskcluster job following a scheduled test task, or as part of
     11 a Taskcluster that runs baseline profile generation on Firebase TestLab.
     12 The script requires the presence of a `matrix_ids.json` artifact in the results directory
     13 and the availability of the `gsutil` command in the environment.
     14 
     15 The script performs the following operations:
     16 - Loads the `matrix_ids.json` artifact to identify the GCS paths for the artifacts.
     17 - In the case of crash logs, identifies failed devices based on the outcomes specified in the `matrix_ids.json` artifact.
     18 - Fetches the specified artifact type (baseline profiles or crash logs) from the specified GCS paths.
     19 - Copies the fetched artifacts to the current worker artifact results directory.
     20 
     21 The script is configured to log its operations and errors, providing visibility into its execution process.
     22 It uses the `gsutil` command-line tool to interact with GCS, ensuring compatibility with the GCS environment.
     23 
     24 Usage:
     25    python3 copy-artifacts-from-ftl.py <artifact_type>
     26 
     27    artifact_type: "baseline_profile" or "crash_log"
     28 
     29 Requirements:
     30    - The `matrix_ids.json` artifact must be present in the results directory.
     31    - The `gsutil` command must be available in the environment.
     32    - The script should be run after a scheduled test task in a Taskcluster job or as part of a
     33        scheduled baseline profile task in a Taskcluster job
     34 
     35 Output:
     36    - Artifacts are copied to the current worker artifact results directory.
     37 """
     38 
     39 import json
     40 import logging
     41 import os
     42 import re
     43 import subprocess
     44 import sys
     45 from enum import Enum
     46 
     47 
     48 def setup_logging():
     49    """Configure logging for the script."""
     50    log_format = "%(message)s"
     51    logging.basicConfig(level=logging.INFO, format=log_format)
     52 
     53 
     54 class Worker(Enum):
     55    """
     56    Worker paths
     57    """
     58 
     59    RESULTS_DIR = "/builds/worker/artifacts/results"
     60    BASELINE_PROFILE_DIR = "/builds/worker/workspace/baselineProfile"
     61    MACROBENCHMARK_DEST = "/builds/worker/artifacts/build/macrobenchmark.json"
     62    MACROBENCHMARK_DIR = "/builds/worker/artifacts/build/macrobenchmark"
     63    MEMORY_LEAKS_DIR = "/builds/worker/artifacts/build/memory_leaks"
     64    ARTIFACTS_DIR = "/builds/worker/artifacts/build"
     65 
     66 
     67 class ArtifactType(Enum):
     68    """
     69    Artifact types for fetching matrix IDs, crash logs and baseline profile.
     70    """
     71 
     72    BASELINE_PROFILE = (
     73        "artifacts/sdcard/Android/media/org.mozilla.fenix.benchmark/*-baseline-prof.txt"
     74    )
     75    CRASH_LOG = "data_app_crash*.txt"
     76    MACROBENCHMARK = (
     77        "artifacts/sdcard/Android/media/org.mozilla.fenix.benchmark/*benchmarkData.json"
     78    )
     79    MATRIX_IDS = "matrix_ids.json"
     80    MEMORY_LEAKS = "artifacts/sdcard/Download/memory_leaks/*.txt"
     81 
     82 
     83 def load_matrix_ids_artifact(matrix_file_path):
     84    """Load the matrix IDs artifact from the specified file path.
     85 
     86    Args:
     87        matrix_file_path (str): The file path to the matrix IDs artifact.
     88    Returns:
     89        dict: The contents of the matrix IDs artifact.
     90    """
     91    try:
     92        with open(matrix_file_path) as f:
     93            return json.load(f)
     94    except FileNotFoundError:
     95        exit_with_error(f"Could not find matrix file: {matrix_file_path}")
     96    except json.JSONDecodeError:
     97        exit_with_error(f"Error decoding matrix file: {matrix_file_path}")
     98 
     99 
    100 def get_gcs_path(matrix_artifact_file):
    101    """
    102    Extract the root GCS path from the matrix artifact file.
    103 
    104    Args:
    105        matrix_artifact_file (dict): The matrix artifact file contents.
    106    Returns:
    107        str: The root GCS path extracted from the matrix artifact file.
    108    """
    109    for matrix in matrix_artifact_file.values():
    110        gcs_path = matrix.get("gcsPath")
    111        if gcs_path:
    112            return gcs_path
    113    return None
    114 
    115 
    116 def check_gsutil_availability():
    117    """
    118    Check the availability of the `gsutil` command in the environment.
    119    Exit the script if `gsutil` is not available.
    120    """
    121    try:
    122        subprocess.run(
    123            ["gsutil", "--version"], capture_output=True, text=True, check=True
    124        )
    125    except Exception as e:
    126        exit_with_error(f"Error executing gsutil: {e}")
    127 
    128 
    129 def fetch_artifacts(root_gcs_path, device, artifact_pattern):
    130    """
    131    Fetch artifacts from the specified GCS path pattern for the given device.
    132 
    133    Args:
    134        root_gcs_path (str): The root GCS path for the artifacts.
    135        device (str): The device name for which to fetch artifacts.
    136        artifact_pattern (str): The pattern to match the artifacts.
    137    Returns:
    138        list: A list of artifacts matching the specified pattern.
    139    """
    140    gcs_path = f"gs://{root_gcs_path.rstrip('/')}/{device}*/{artifact_pattern}"
    141 
    142    try:
    143        result = subprocess.check_output(["gsutil", "ls", gcs_path], text=True)
    144        return result.splitlines()
    145    except subprocess.CalledProcessError as e:
    146        if "AccessDeniedException" in e.output:
    147            logging.error(f"Permission denied for GCS path: {gcs_path}")
    148        elif "network error" in e.output.lower():
    149            logging.error(f"Network error accessing GCS path: {gcs_path}")
    150        else:
    151            logging.error(f"Failed to list files: {e.output}")
    152        return []
    153    except Exception as e:
    154        logging.error(f"Error executing gsutil: {e}")
    155        return []
    156 
    157 
    158 def fetch_device_names(matrix_artifact_file, only_failed=False):
    159    """
    160    Fetch the names of devices that were used based on the outcomes specified in the matrix artifact file.
    161 
    162    Args:
    163        matrix_artifact_file (dict): The matrix artifact file contents.
    164        only_failed (bool): If True, only return devices with failed outcomes.
    165    Returns:
    166        list: A list of device names.
    167    """
    168    devices = []
    169    for matrix in matrix_artifact_file.values():
    170        axes = matrix.get("axes", [])
    171        for axis in axes:
    172            if not only_failed or axis.get("outcome") == "failure":
    173                device = axis.get("device")
    174                if device:
    175                    devices.append(device)
    176    return devices
    177 
    178 
    179 def gsutil_cp(artifact, dest):
    180    """
    181    Copy the specified artifact to the destination path using `gsutil`.
    182 
    183    Args:
    184        artifact (str): The path to the artifact to copy.
    185        dest (str): The destination path to copy the artifact to.
    186    Returns:
    187        None
    188    """
    189    logging.info(f"Copying {artifact} to {dest}")
    190    try:
    191        result = subprocess.run(
    192            ["gsutil", "cp", artifact, dest],
    193            check=False,
    194            capture_output=True,
    195            text=True,
    196        )
    197        if result.returncode != 0:
    198            if "AccessDeniedException" in result.stderr:
    199                logging.error(f"Permission denied for GCS path: {artifact}")
    200            elif "network error" in result.stderr.lower():
    201                logging.error(f"Network error accessing GCS path: {artifact}")
    202            else:
    203                logging.error(f"Failed to list files: {result.stderr}")
    204    except Exception as e:
    205        logging.error(f"Error executing gsutil: {e}")
    206 
    207 
    208 def parse_crash_log(log_path):
    209    """Parse the crash log and log any crash stacks in a specific format."""
    210    crashes_reported = 0
    211    if os.path.isfile(log_path):
    212        with open(log_path) as f:
    213            contents = f.read()
    214            proc = "unknown"
    215            match = re.search(r"Process: (.*)\n", contents, re.MULTILINE)
    216            if match and len(match.groups()) == 1:
    217                proc = match.group(1)
    218            match = re.search(
    219                r"\n([\w\.]+[:\s\w\.,!?#^\'\"]+)\s*(at\s.*\n)", contents, re.MULTILINE
    220            )
    221            if match and len(match.groups()) == 2:
    222                top_frame = match.group(1).rstrip() + " " + match.group(2)
    223                remainder = contents[match.span()[1] :]
    224                logging.error(f"PROCESS-CRASH | {proc} | {top_frame}{remainder}")
    225                crashes_reported = 1
    226    return crashes_reported
    227 
    228 
    229 def process_artifacts(artifact_type):
    230    """
    231    Process the artifacts based on the specified artifact type.
    232 
    233    Args:
    234        artifact_type (ArtifactType): The type of artifact to process.
    235    """
    236 
    237    matrix_ids_artifact = load_matrix_ids_artifact(
    238        Worker.RESULTS_DIR.value + "/" + ArtifactType.MATRIX_IDS.value
    239    )
    240    only_get_devices_with_failure = artifact_type == ArtifactType.CRASH_LOG
    241    device_names = fetch_device_names(
    242        matrix_ids_artifact, only_get_devices_with_failure
    243    )
    244 
    245    if not device_names:
    246        if artifact_type == ArtifactType.CRASH_LOG:
    247            logging.info(
    248                "No devices with failure outcomes found - skipping crash log collection."
    249            )
    250            return
    251        else:
    252            exit_with_error("Could not find any device in matrix file.")
    253 
    254    root_gcs_path = get_gcs_path(matrix_ids_artifact)
    255    if not root_gcs_path:
    256        exit_with_error("Could not find root GCS path in matrix file.")
    257 
    258    if artifact_type == ArtifactType.BASELINE_PROFILE:
    259        return process_baseline_profile_artifacts(root_gcs_path, device_names)
    260    elif artifact_type == ArtifactType.MACROBENCHMARK:
    261        return process_macrobenchmark_artifact(root_gcs_path, device_names)
    262    elif artifact_type == ArtifactType.MEMORY_LEAKS:
    263        return process_memory_leaks_artifacts(root_gcs_path, device_names)
    264    else:
    265        return process_crash_artifacts(root_gcs_path, device_names)
    266 
    267 
    268 def process_baseline_profile_artifacts(root_gcs_path, device_names):
    269    device = device_names[0]
    270    artifacts = fetch_artifacts(
    271        root_gcs_path, device, ArtifactType.BASELINE_PROFILE.value
    272    )
    273    if not artifacts:
    274        exit_with_error(f"No baseline profile artifacts found for device: {device}")
    275 
    276    downloaded_files = []
    277 
    278    for artifact in artifacts:
    279        base_name = os.path.basename(artifact)
    280        dest_path = os.path.join(Worker.BASELINE_PROFILE_DIR.value, base_name)
    281        count = 1
    282 
    283        # If file exists, find a unique name
    284        while os.path.exists(dest_path):
    285            name, extension = os.path.splitext(base_name)
    286            dest_path = os.path.join(
    287                Worker.BASELINE_PROFILE_DIR.value, f"{name}_{count}{extension}"
    288            )
    289            count += 1
    290 
    291        gsutil_cp(artifact, dest_path)
    292        downloaded_files.append(dest_path)
    293 
    294 
    295 def process_macrobenchmark_artifact(root_gcs_path, device_names):
    296    device = device_names[0]
    297    artifacts = fetch_artifacts(
    298        root_gcs_path, device, ArtifactType.MACROBENCHMARK.value
    299    )
    300    if not artifacts:
    301        exit_with_error(f"No macrobenchmark artifacts found for device: {device}")
    302 
    303    downloaded_files = []
    304 
    305    for artifact in artifacts:
    306        base_name = os.path.basename(artifact)
    307        ## TODO: Maybe get the name from the shard number
    308        dest_path = os.path.join(Worker.MACROBENCHMARK_DIR.value, base_name)
    309        count = 1
    310 
    311        # If file exists, find a unique name
    312        while os.path.exists(dest_path):
    313            name, extension = os.path.splitext(base_name)
    314            dest_path = os.path.join(
    315                Worker.MACROBENCHMARK_DIR.value, f"{name}_{count}{extension}"
    316            )
    317            count += 1
    318 
    319        gsutil_cp(artifact, dest_path)
    320        downloaded_files.append(dest_path)
    321 
    322 
    323 def process_memory_leaks_artifacts(root_gcs_path, device_names):
    324    for device in device_names:
    325        artifacts = fetch_artifacts(
    326            root_gcs_path, device, ArtifactType.MEMORY_LEAKS.value
    327        )
    328        if not artifacts:
    329            logging.info(f"No artifacts found for device: {device}")
    330            continue
    331        for artifact in artifacts:
    332            base_name = os.path.basename(artifact)
    333            dest_path = os.path.join(Worker.MEMORY_LEAKS_DIR.value, f"leak_{base_name}")
    334 
    335            gsutil_cp(artifact, dest_path)
    336 
    337 
    338 def process_crash_artifacts(root_gcs_path, failed_device_names):
    339    crashes_reported = 0
    340    for device in failed_device_names:
    341        artifacts = fetch_artifacts(root_gcs_path, device, ArtifactType.CRASH_LOG.value)
    342        if not artifacts:
    343            logging.info(f"No artifacts found for device: {device}")
    344            continue
    345 
    346        for artifact in artifacts:
    347            gsutil_cp(artifact, Worker.RESULTS_DIR.value)
    348            crashes_reported += parse_crash_log(
    349                os.path.join(Worker.RESULTS_DIR.value, os.path.basename(artifact))
    350            )
    351 
    352    return crashes_reported
    353 
    354 
    355 def exit_with_error(message):
    356    logging.error(message)
    357    sys.exit(1)
    358 
    359 
    360 def main():
    361    setup_logging()
    362    check_gsutil_availability()
    363 
    364    if len(sys.argv) < 2:
    365        logging.error("Usage: python script_name.py <artifact_type>")
    366        sys.exit(1)
    367 
    368    artifact_type_arg = sys.argv[1]
    369    if artifact_type_arg == "baseline_profile":
    370        process_artifacts(ArtifactType.BASELINE_PROFILE)
    371    elif artifact_type_arg == "macrobenchmark":
    372        process_artifacts(ArtifactType.MACROBENCHMARK)
    373    elif artifact_type_arg == "crash_log":
    374        process_artifacts(ArtifactType.CRASH_LOG)
    375    elif artifact_type_arg == "memory_leaks":
    376        process_artifacts(ArtifactType.MEMORY_LEAKS)
    377    else:
    378        logging.error(
    379            "Invalid artifact type. Use one of 'baseline_profile', 'macrobenchmark', 'crash_log or 'memory_leaks."
    380        )
    381        sys.exit(1)
    382 
    383 
    384 if __name__ == "__main__":
    385    sys.exit(main())