tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

utils.py (7575B)


      1 # mypy: allow-untyped-defs
      2 
      3 import errno
      4 import logging
      5 import os
      6 import shutil
      7 import stat
      8 import subprocess
      9 import sys
     10 import tarfile
     11 import time
     12 import zipfile
     13 from io import BytesIO
     14 from socket import error as SocketError  # NOQA: N812
     15 from urllib.request import urlopen
     16 
     17 logger = logging.getLogger(__name__)
     18 
     19 
     20 def call(*args):
     21    """Log terminal command, invoke it as a subprocess.
     22 
     23    Returns a bytestring of the subprocess output if no error.
     24    """
     25    logger.debug(" ".join(args))
     26    try:
     27        return subprocess.check_output(args).decode('utf8')
     28    except subprocess.CalledProcessError as e:
     29        logger.critical("%s exited with return code %i" %
     30                        (e.cmd, e.returncode))
     31        logger.critical(e.output)
     32        raise
     33 
     34 
     35 def seekable(fileobj):
     36    """Attempt to use file.seek on given file, with fallbacks."""
     37    try:
     38        fileobj.seek(fileobj.tell())
     39    except Exception:
     40        return BytesIO(fileobj.read())
     41    else:
     42        return fileobj
     43 
     44 
     45 def untar(fileobj, dest="."):
     46    """Extract tar archive."""
     47    logger.debug("untar")
     48    fileobj = seekable(fileobj)
     49    kwargs = {}
     50    if sys.version_info.major >= 3 and sys.version_info.minor >= 12:
     51        kwargs["filter"] = "tar"
     52    with tarfile.open(fileobj=fileobj) as tar_data:
     53        tar_data.extractall(path=dest, **kwargs)
     54 
     55 
     56 def unzip(fileobj, dest=None, limit=None):
     57    """Extract zip archive."""
     58    logger.debug("unzip")
     59    fileobj = seekable(fileobj)
     60    with zipfile.ZipFile(fileobj) as zip_data:
     61        for info in zip_data.infolist():
     62            if limit is not None and info.filename not in limit:
     63                continue
     64            # external_attr has a size of 4 bytes and the info it contains depends on the system where the ZIP file was created.
     65            # - If the Zipfile was created on an UNIX environment, then the 2 highest bytes represent UNIX permissions and file
     66            #   type bits (sys/stat.h st_mode entry on struct stat) and the lowest byte represents DOS FAT compatibility attributes
     67            #   (used mainly to store the directory bit).
     68            # - If the ZipFile was created on a WIN/DOS environment then the lowest byte represents DOS FAT file attributes
     69            #   (those attributes are: directory bit, hidden bit, read-only bit, system-file bit, etc).
     70            # More info at https://unix.stackexchange.com/a/14727 and https://forensicswiki.xyz/page/ZIP
     71            # So, we can ignore the DOS FAT attributes because python ZipFile.extract() already takes care of creating the directories
     72            # as needed (both on win and *nix) and the other DOS FAT attributes (hidden/read-only/system-file/etc) are not interesting
     73            # here (not even on Windows, since we don't care about setting those extra attributes for our use case).
     74            # So we do this:
     75            #   1. When uncompressing on a Windows system we just call to extract().
     76            #   2. When uncompressing on an Unix-like system we only take care of the attributes if the zip file was created on an
     77            #      Unix-like system, otherwise we don't have any info about the file permissions other than the DOS FAT attributes,
     78            #      which are useless here, so just call to extract() without setting any specific file permission in that case.
     79            if info.create_system == 0 or sys.platform == 'win32':
     80                zip_data.extract(info, path=dest)
     81            else:
     82                stat_st_mode = info.external_attr >> 16
     83                info_dst_path = os.path.join(dest, info.filename)
     84                if stat.S_ISLNK(stat_st_mode):
     85                    # Symlinks are stored in the ZIP file as text files that contain inside the target filename of the symlink.
     86                    # Recreate the symlink instead of calling extract() when an entry with the attribute stat.S_IFLNK is detected.
     87                    link_src_path = zip_data.read(info)
     88                    link_dst_dir = os.path.dirname(info_dst_path)
     89                    if not os.path.isdir(link_dst_dir):
     90                        os.makedirs(link_dst_dir)
     91 
     92                    # Remove existing link if exists.
     93                    if os.path.islink(info_dst_path):
     94                        os.unlink(info_dst_path)
     95                    os.symlink(link_src_path, info_dst_path)
     96                else:
     97                    zip_data.extract(info, path=dest)
     98                    # Preserve bits 0-8 only: rwxrwxrwx (no sticky/setuid/setgid bits).
     99                    perm = stat_st_mode & 0x1FF
    100                    os.chmod(info_dst_path, perm)
    101 
    102 
    103 def get(url):
    104    """Issue GET request to a given URL and return the response."""
    105    import requests
    106 
    107    logger.debug("GET %s" % url)
    108    resp = requests.get(url, stream=True)
    109    resp.raise_for_status()
    110    return resp
    111 
    112 
    113 def get_download_to_descriptor(fd, url, max_retries=5):
    114    """Download an URL in chunks and saves it to a file descriptor (truncating it)
    115    It doesn't close the descriptor, but flushes it on success.
    116    It retries the download in case of ECONNRESET up to max_retries.
    117    This function is meant to download big files directly to the disk without
    118    caching the whole file in memory.
    119    """
    120    if max_retries < 1:
    121        max_retries = 1
    122    wait = 2
    123    for current_retry in range(1, max_retries+1):
    124        try:
    125            logger.info("Downloading %s Try %d/%d" % (url, current_retry, max_retries))
    126            resp = urlopen(url)
    127            # We may come here in a retry, ensure to truncate fd before start writing.
    128            fd.seek(0)
    129            fd.truncate(0)
    130            while True:
    131                chunk = resp.read(16*1024)
    132                if not chunk:
    133                    break  # Download finished
    134                fd.write(chunk)
    135            fd.flush()
    136            # Success
    137            return
    138        except SocketError as e:
    139            if current_retry < max_retries and e.errno == errno.ECONNRESET:
    140                # Retry
    141                logger.error("Connection reset by peer. Retrying after %ds..." % wait)
    142                time.sleep(wait)
    143                wait *= 2
    144            else:
    145                # Maximum retries or unknown error
    146                raise
    147 
    148 def rmtree(path: str) -> None:
    149    # This works around two issues:
    150    # 1. Cannot delete read-only files owned by us (e.g. files extracted from tarballs)
    151    # 2. On Windows, we sometimes just need to retry in case the file handler
    152    #    hasn't been fully released (a common issue).
    153    def handle_remove_readonly(func, path, exc):
    154        excvalue = exc[1]
    155        if func in (os.rmdir, os.remove, os.unlink) and excvalue.errno == errno.EACCES:
    156            os.chmod(path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)  # 0777
    157            func(path)
    158        else:
    159            raise
    160 
    161    return shutil.rmtree(path, onerror=handle_remove_readonly)
    162 
    163 
    164 def sha256sum(file_path):
    165    """Computes the SHA256 hash sum of a file"""
    166    from hashlib import sha256
    167    hash = sha256()
    168    with open(file_path, 'rb') as f:
    169        for chunk in iter(lambda: f.read(4096), b''):
    170            hash.update(chunk)
    171    return hash.hexdigest()
    172 
    173 
    174 # see https://docs.python.org/3/whatsnew/3.12.html#imp
    175 def load_source(modname, filename):
    176    import importlib.machinery
    177    import importlib.util
    178 
    179    loader = importlib.machinery.SourceFileLoader(modname, filename)
    180    spec = importlib.util.spec_from_file_location(modname, filename, loader=loader)
    181    module = importlib.util.module_from_spec(spec)
    182    sys.modules[module.__name__] = module
    183    loader.exec_module(module)
    184    return module