tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

serial_boot_device.py (12329B)


      1 #!/usr/bin/env python3
      2 # Copyright 2023 The Chromium Authors
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 """Helpers to reliably reboot the device via serial and fastboot.
      6 
      7 Note, this file will be executed in docker instance without vpython3, so we use
      8 python3 instead. The docker instance runs this file as a symbolic link of dmc
      9 via the "main" function.
     10 """
     11 
     12 import json
     13 import logging
     14 import os
     15 import shutil
     16 import signal
     17 import subprocess
     18 import sys
     19 import time
     20 
     21 from typing import List
     22 from boot_device import BootMode
     23 from compatible_utils import running_unattended
     24 
     25 # pylint: disable=too-many-return-statements, too-many-branches
     26 
     27 
     28 def _env_ready() -> bool:
     29    """Checks if the required environment is ready to support the functions in
     30    this file."""
     31    if shutil.which('fastboot') is None:
     32        logging.warning('fastboot is not accessible')
     33        return False
     34    if shutil.which('serialio') is None:
     35        logging.warning('serialio is not accessible')
     36        return False
     37    return True
     38 
     39 
     40 def boot_device(node_id: str,
     41                serial_num: str,
     42                mode: BootMode,
     43                must_boot: bool = False) -> bool:
     44    """Boots device into desired mode via serial and fastboot.
     45    This function waits for at most 10 minutes for the transition.
     46 
     47    Args:
     48        node_id: The fuchsia node id of the device.
     49        serial_num: The fastboot serial number of the device.
     50        mode: Desired boot mode.
     51        must_boot: Forces device to reboot regardless the current state.
     52 
     53    Returns:
     54        a boolean value to indicate if the operation succeeded; missing
     55        dependencies like serialio (for serial access) and fastboot, or the
     56        device cannot be found may also introduce the error.
     57    """
     58    #TODO(crbug.com/40935296): Remove the default values once the use in
     59    # flash_device has been migrated.
     60    if node_id is None:
     61        node_id = os.getenv('FUCHSIA_NODENAME')
     62    if serial_num is None:
     63        serial_num = os.getenv('FUCHSIA_FASTBOOT_SERNUM')
     64    assert node_id is not None
     65    assert serial_num is not None
     66 
     67    assert mode in [BootMode.REGULAR, BootMode.BOOTLOADER
     68                    ], 'Unsupported BootMode %s for serial_boot_device.' % mode
     69    assert _env_ready()
     70 
     71    if is_in_fastboot(serial_num):
     72        # fastboot is stateless and there isn't a reason to reboot the device
     73        # again to go to the fastboot.
     74        if mode == BootMode.BOOTLOADER:
     75            return True
     76        if not _run_fastboot(['reboot'], serial_num):
     77            # Shouldn't return None here, unless the device was rebooting. In
     78            # the case, it would be safer to return false.
     79            return False
     80    else:
     81        # Even not must_boot, still check if the device is running fuchsia to
     82        # detect the broken state and force a reboot to recover it.
     83        if is_in_fuchsia(node_id):
     84            if not must_boot and mode == BootMode.REGULAR:
     85                return True
     86        else:
     87            logging.error('Cannot find node id %s or fastboot serial number '
     88                          '%s, the os may run into panic, will try to use dm '
     89                          'to reboot it anyway.',
     90                          node_id, serial_num)
     91        # pylint: disable=subprocess-run-check
     92        if subprocess.run([
     93                'serialio', node_id, 'send', 'dm', 'reboot' +
     94                ('' if mode == BootMode.REGULAR else '-bootloader')
     95        ]).returncode != 0:
     96            logging.error('Failed to send dm reboot[-bootloader] via serialio')
     97            return False
     98 
     99    start_sec = time.time()
    100    while time.time() - start_sec < 600:
    101        assert mode in [BootMode.REGULAR, BootMode.BOOTLOADER]
    102        if mode == BootMode.REGULAR and is_in_fuchsia(node_id):
    103            return True
    104        if mode == BootMode.BOOTLOADER and is_in_fastboot(serial_num):
    105            return True
    106    logging.error(
    107        'Failed to transite node id %s or fastboot serial number %s '
    108        'to expected state %s', node_id, serial_num, mode)
    109    return False
    110 
    111 
    112 def _serialio_send_and_wait(node_id: str, command: List[str],
    113                            waitfor: str) -> bool:
    114    """Continously sends the command to the device and waits for the waitfor
    115    string via serialio.
    116    This function asserts the existence of serialio and waits at most ~30
    117    seconds."""
    118    assert shutil.which('serialio') is not None
    119    start_sec = time.time()
    120    with subprocess.Popen(['serialio', node_id, 'wait', waitfor],
    121                          stdout=subprocess.DEVNULL,
    122                          stderr=subprocess.DEVNULL) as proc:
    123        while time.time() - start_sec < 28:
    124            send_command = ['serialio', node_id, 'send']
    125            send_command.extend(command)
    126            # pylint: disable=subprocess-run-check
    127            if subprocess.run(send_command).returncode != 0:
    128                logging.error('Failed to send %s via serialio to %s', command,
    129                              node_id)
    130                return False
    131            result = proc.poll()
    132            if result is not None:
    133                if result == 0:
    134                    return True
    135                logging.error(
    136                    'Failed to wait %s via serial to %s, '
    137                    'return code %s', waitfor, node_id, result)
    138                return False
    139            time.sleep(2)
    140        proc.kill()
    141    logging.error('Have not found %s via serialio to %s', waitfor, node_id)
    142    return False
    143 
    144 
    145 def is_in_fuchsia(node_id: str) -> bool:
    146    """Checks if the device is running in fuchsia through serial.
    147    Note, this check goes through serial and does not guarantee the fuchsia os
    148    has a workable network or ssh connection.
    149    This function asserts the existence of serialio and waits at most ~60
    150    seconds."""
    151    if not _serialio_send_and_wait(
    152            node_id, ['echo', 'yes-i-am-healthy', '|', 'sha1sum'],
    153            '89d517b7db104aada669a83bc3c3a906e00671f7'):
    154        logging.error(
    155            'Device %s did not respond echo, '
    156            'it may not be running fuchsia', node_id)
    157        return False
    158    if not _serialio_send_and_wait(node_id, ['ps'], 'sshd'):
    159        logging.warning(
    160            'Cannot find sshd from ps on %s, the ssh '
    161            'connection may not be available.', node_id)
    162    return True
    163 
    164 
    165 def is_in_fastboot(serial_num: str) -> bool:
    166    """Checks if the device is running in fastboot through fastboot command.
    167    Note, the fastboot may be impacted by the usb congestion and causes this
    168    function to return false.
    169    This function asserts the existence of fastboot and waits at most ~30
    170    seconds."""
    171    start_sec = time.time()
    172    while time.time() - start_sec < 28:
    173        result = _run_fastboot(['getvar', 'product'], serial_num)
    174        if result is None:
    175            return False
    176        if result:
    177            return True
    178        time.sleep(2)
    179    logging.error('Failed to wait for fastboot state of %s', serial_num)
    180    return False
    181 
    182 
    183 def _run_fastboot(args: List[str], serial_num: str) -> bool:
    184    """Executes the fastboot command and kills the hanging process.
    185    The fastboot may be impacted by the usb congestion and causes the process to
    186    hang forever. So this command waits for 30 seconds before killing the
    187    process, and it's not good for flashing.
    188    Note, if this function detects the fastboot is waiting for the device, i.e.
    189    the device is not in the fastboot, it returns None instead, e.g. unknown.
    190    This function asserts the existence of fastboot."""
    191    assert shutil.which('fastboot') is not None
    192    args.insert(0, 'fastboot')
    193    args.extend(('-s', serial_num))
    194    try:
    195        # Capture output to ensure we can get '< waiting for serial-num >'
    196        # output.
    197        # pylint: disable=subprocess-run-check
    198        if subprocess.run(args, capture_output=True,
    199                          timeout=30).returncode == 0:
    200            return True
    201    except subprocess.TimeoutExpired as timeout:
    202        if timeout.stderr is not None and serial_num.lower(
    203        ) in timeout.stderr.decode().lower():
    204            logging.warning('fastboot is still waiting for %s', serial_num)
    205            return None
    206    logging.error('Failed to run %s against fastboot %s', args, serial_num)
    207    return False
    208 
    209 
    210 def _shutdown_if_serial_is_unavailable(node_id: str) -> None:
    211    if not running_unattended():
    212        return
    213    # pylint: disable=subprocess-run-check
    214    if subprocess.run(['serialio', node_id, 'poll']).returncode != 0:
    215        logging.warning('shutting down the docker by killing the pid 1')
    216        # Before killing the process itself, force shutting down the logging to
    217        # flush everything.
    218        logging.shutdown()
    219        # In docker instance, killing root process will cause the instance to be
    220        # shut down and restarted by swarm_docker. So the updated tty can be
    221        # attached to the new docker instance.
    222        os.kill(1, signal.SIGTERM)
    223 
    224 
    225 def main(action: str) -> int:
    226    """Main entry of serial_boot_device."""
    227    node_id = os.getenv('FUCHSIA_NODENAME')
    228    serial_num = os.getenv('FUCHSIA_FASTBOOT_SERNUM')
    229    assert node_id is not None
    230    assert serial_num is not None
    231 
    232    handlers = [logging.StreamHandler()]
    233    if os.path.isdir('/home/swarming/'):
    234        handlers.append(
    235            logging.FileHandler('/home/swarming/dmc.%s.log' % node_id))
    236    logging.basicConfig(format='%(levelname)s %(asctime)s %(message)s',
    237                        handlers=handlers,
    238                        level=logging.INFO)
    239    logging.info('Running command %s against %s %s', sys.argv, node_id,
    240                 serial_num)
    241 
    242    # Checks the environment after initializing the logging.
    243    if not _env_ready():
    244        logging.error('Missing environment setup, unable to perform action.')
    245        return 2
    246 
    247    if action == 'health-check':
    248        _shutdown_if_serial_is_unavailable(node_id)
    249        if is_in_fuchsia(node_id) or is_in_fastboot(serial_num):
    250            # Print out the json result without using logging to avoid any
    251            # potential formatting issue.
    252            print(
    253                json.dumps([{
    254                    'nodename': node_id,
    255                    'state': 'healthy',
    256                    'status_message': '',
    257                    'dms_state': ''
    258                }]))
    259            return 0
    260        logging.error('Cannot find node id %s or fastboot serial number %s',
    261                      node_id, serial_num)
    262        return 1
    263    if action in ['reboot', 'after-task']:
    264        if action == 'after-task':
    265            _shutdown_if_serial_is_unavailable(node_id)
    266        if boot_device(node_id, serial_num, BootMode.REGULAR, must_boot=True):
    267            return 0
    268        logging.error(
    269            'Cannot reboot the device with node id %s and fastboot '
    270            'serial number %s', node_id, serial_num)
    271        return 1
    272    if action == 'reboot-fastboot':
    273        if boot_device(node_id,
    274                       serial_num,
    275                       BootMode.BOOTLOADER,
    276                       must_boot=True):
    277            return 0
    278        logging.error(
    279            'Cannot reboot the device with node id %s and fastboot '
    280            'serial number %s into fastboot', node_id, serial_num)
    281        return 1
    282    if action == 'is-in-fuchsia':
    283        if is_in_fuchsia(node_id):
    284            return 0
    285        logging.error('Cannot find node id %s', node_id)
    286        return 1
    287    if action == 'is-in-fastboot':
    288        if is_in_fastboot(serial_num):
    289            return 0
    290        logging.error('Cannot find fastboot serial number %s', serial_num)
    291        return 1
    292    if action == 'server-version':
    293        # TODO(crbug.com/40935296): Implement the server-version.
    294        print('chromium')
    295        return 0
    296    if action == 'before-task':
    297        # TODO(crbug.com/40935296): fuchsia.py requires IMAGE_MANIFEST_PATH and
    298        # BOOTSERVER_PATH to support before-task call. So the following
    299        # statement does not work as it should be.
    300        _shutdown_if_serial_is_unavailable(node_id)
    301        return 0
    302    if action == 'set-power-state':
    303        # Do nothing. The device is always restarted during after-task.
    304        return 0
    305    logging.error('Unknown command %s', action)
    306    return 2
    307 
    308 
    309 if __name__ == '__main__':
    310    sys.exit(main(sys.argv[1]))