utils.py (7575B)
1 # mypy: allow-untyped-defs 2 3 import errno 4 import logging 5 import os 6 import shutil 7 import stat 8 import subprocess 9 import sys 10 import tarfile 11 import time 12 import zipfile 13 from io import BytesIO 14 from socket import error as SocketError # NOQA: N812 15 from urllib.request import urlopen 16 17 logger = logging.getLogger(__name__) 18 19 20 def call(*args): 21 """Log terminal command, invoke it as a subprocess. 22 23 Returns a bytestring of the subprocess output if no error. 24 """ 25 logger.debug(" ".join(args)) 26 try: 27 return subprocess.check_output(args).decode('utf8') 28 except subprocess.CalledProcessError as e: 29 logger.critical("%s exited with return code %i" % 30 (e.cmd, e.returncode)) 31 logger.critical(e.output) 32 raise 33 34 35 def seekable(fileobj): 36 """Attempt to use file.seek on given file, with fallbacks.""" 37 try: 38 fileobj.seek(fileobj.tell()) 39 except Exception: 40 return BytesIO(fileobj.read()) 41 else: 42 return fileobj 43 44 45 def untar(fileobj, dest="."): 46 """Extract tar archive.""" 47 logger.debug("untar") 48 fileobj = seekable(fileobj) 49 kwargs = {} 50 if sys.version_info.major >= 3 and sys.version_info.minor >= 12: 51 kwargs["filter"] = "tar" 52 with tarfile.open(fileobj=fileobj) as tar_data: 53 tar_data.extractall(path=dest, **kwargs) 54 55 56 def unzip(fileobj, dest=None, limit=None): 57 """Extract zip archive.""" 58 logger.debug("unzip") 59 fileobj = seekable(fileobj) 60 with zipfile.ZipFile(fileobj) as zip_data: 61 for info in zip_data.infolist(): 62 if limit is not None and info.filename not in limit: 63 continue 64 # external_attr has a size of 4 bytes and the info it contains depends on the system where the ZIP file was created. 65 # - If the Zipfile was created on an UNIX environment, then the 2 highest bytes represent UNIX permissions and file 66 # type bits (sys/stat.h st_mode entry on struct stat) and the lowest byte represents DOS FAT compatibility attributes 67 # (used mainly to store the directory bit). 68 # - If the ZipFile was created on a WIN/DOS environment then the lowest byte represents DOS FAT file attributes 69 # (those attributes are: directory bit, hidden bit, read-only bit, system-file bit, etc). 70 # More info at https://unix.stackexchange.com/a/14727 and https://forensicswiki.xyz/page/ZIP 71 # So, we can ignore the DOS FAT attributes because python ZipFile.extract() already takes care of creating the directories 72 # as needed (both on win and *nix) and the other DOS FAT attributes (hidden/read-only/system-file/etc) are not interesting 73 # here (not even on Windows, since we don't care about setting those extra attributes for our use case). 74 # So we do this: 75 # 1. When uncompressing on a Windows system we just call to extract(). 76 # 2. When uncompressing on an Unix-like system we only take care of the attributes if the zip file was created on an 77 # Unix-like system, otherwise we don't have any info about the file permissions other than the DOS FAT attributes, 78 # which are useless here, so just call to extract() without setting any specific file permission in that case. 79 if info.create_system == 0 or sys.platform == 'win32': 80 zip_data.extract(info, path=dest) 81 else: 82 stat_st_mode = info.external_attr >> 16 83 info_dst_path = os.path.join(dest, info.filename) 84 if stat.S_ISLNK(stat_st_mode): 85 # Symlinks are stored in the ZIP file as text files that contain inside the target filename of the symlink. 86 # Recreate the symlink instead of calling extract() when an entry with the attribute stat.S_IFLNK is detected. 87 link_src_path = zip_data.read(info) 88 link_dst_dir = os.path.dirname(info_dst_path) 89 if not os.path.isdir(link_dst_dir): 90 os.makedirs(link_dst_dir) 91 92 # Remove existing link if exists. 93 if os.path.islink(info_dst_path): 94 os.unlink(info_dst_path) 95 os.symlink(link_src_path, info_dst_path) 96 else: 97 zip_data.extract(info, path=dest) 98 # Preserve bits 0-8 only: rwxrwxrwx (no sticky/setuid/setgid bits). 99 perm = stat_st_mode & 0x1FF 100 os.chmod(info_dst_path, perm) 101 102 103 def get(url): 104 """Issue GET request to a given URL and return the response.""" 105 import requests 106 107 logger.debug("GET %s" % url) 108 resp = requests.get(url, stream=True) 109 resp.raise_for_status() 110 return resp 111 112 113 def get_download_to_descriptor(fd, url, max_retries=5): 114 """Download an URL in chunks and saves it to a file descriptor (truncating it) 115 It doesn't close the descriptor, but flushes it on success. 116 It retries the download in case of ECONNRESET up to max_retries. 117 This function is meant to download big files directly to the disk without 118 caching the whole file in memory. 119 """ 120 if max_retries < 1: 121 max_retries = 1 122 wait = 2 123 for current_retry in range(1, max_retries+1): 124 try: 125 logger.info("Downloading %s Try %d/%d" % (url, current_retry, max_retries)) 126 resp = urlopen(url) 127 # We may come here in a retry, ensure to truncate fd before start writing. 128 fd.seek(0) 129 fd.truncate(0) 130 while True: 131 chunk = resp.read(16*1024) 132 if not chunk: 133 break # Download finished 134 fd.write(chunk) 135 fd.flush() 136 # Success 137 return 138 except SocketError as e: 139 if current_retry < max_retries and e.errno == errno.ECONNRESET: 140 # Retry 141 logger.error("Connection reset by peer. Retrying after %ds..." % wait) 142 time.sleep(wait) 143 wait *= 2 144 else: 145 # Maximum retries or unknown error 146 raise 147 148 def rmtree(path: str) -> None: 149 # This works around two issues: 150 # 1. Cannot delete read-only files owned by us (e.g. files extracted from tarballs) 151 # 2. On Windows, we sometimes just need to retry in case the file handler 152 # hasn't been fully released (a common issue). 153 def handle_remove_readonly(func, path, exc): 154 excvalue = exc[1] 155 if func in (os.rmdir, os.remove, os.unlink) and excvalue.errno == errno.EACCES: 156 os.chmod(path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO) # 0777 157 func(path) 158 else: 159 raise 160 161 return shutil.rmtree(path, onerror=handle_remove_readonly) 162 163 164 def sha256sum(file_path): 165 """Computes the SHA256 hash sum of a file""" 166 from hashlib import sha256 167 hash = sha256() 168 with open(file_path, 'rb') as f: 169 for chunk in iter(lambda: f.read(4096), b''): 170 hash.update(chunk) 171 return hash.hexdigest() 172 173 174 # see https://docs.python.org/3/whatsnew/3.12.html#imp 175 def load_source(modname, filename): 176 import importlib.machinery 177 import importlib.util 178 179 loader = importlib.machinery.SourceFileLoader(modname, filename) 180 spec = importlib.util.spec_from_file_location(modname, filename, loader=loader) 181 module = importlib.util.module_from_spec(spec) 182 sys.modules[module.__name__] = module 183 loader.exec_module(module) 184 return module