archiver.py (2989B)
1 # This Source Code Form is subject to the terms of the Mozilla Public 2 # License, v. 2.0. If a copy of the MPL was not distributed with this 3 # file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 """Helper to create tarballs.""" 5 6 import copy 7 import glob 8 import os 9 import tarfile 10 11 from condprof import progress 12 from condprof.util import TASK_CLUSTER 13 14 15 def _tarinfo2mem(tar, tarinfo): 16 metadata = copy.copy(tarinfo) 17 try: 18 data = tar.extractfile(tarinfo) 19 if data is not None: 20 data = data.read() 21 except Exception: 22 data = None 23 24 return metadata, data 25 26 27 class Archiver: 28 def __init__(self, scenario, profile_dir, archives_dir): 29 self.profile_dir = profile_dir 30 self.archives_dir = archives_dir 31 self.scenario = scenario 32 33 def _strftime(self, date, template="-%Y-%m-%d-hp.tar.gz"): 34 return date.strftime(self.scenario + template) 35 36 def _get_archive_path(self, when): 37 archive = self._strftime(when) 38 return os.path.join(self.archives_dir, archive), archive 39 40 def create_archive(self, when, iterator=None): 41 42 def _filter(tarinfo): 43 name = tarinfo.name 44 if name.endswith((".dmp", ".extra")) and "minidumps" in name: 45 # Inore crash files such as: 46 # - minidumps/5b2d4a13-54e6-5ebb-9a6f-913a3451e56a.dmp 47 # - minidumps/5b2d4a13-54e6-5ebb-9a6f-913a3451e56a.extra 48 # ... because they can cause permafailing tests (bug 2007615). 49 # 50 # We are excluding them here instead of removing the crash dump 51 # from the filesystem before archival, in case anyone wants to 52 # inspect these files. 53 return None 54 return tarinfo 55 56 if iterator is None: 57 58 def _files(tar): 59 files = glob.glob(os.path.join(self.profile_dir, "*")) 60 yield len(files) 61 for filename in files: 62 try: 63 tar.add(filename, os.path.basename(filename), filter=_filter) 64 yield filename 65 except FileNotFoundError: # NOQA 66 # locks and such 67 pass 68 69 iterator = _files 70 71 if isinstance(when, str): 72 archive = when 73 else: 74 archive, __ = self._get_archive_path(when) 75 76 with tarfile.open(archive, "w:gz", dereference=True) as tar: 77 it = iterator(tar) 78 size = next(it) 79 with progress.Bar(expected_size=size) as bar: 80 for filename in it: 81 if not TASK_CLUSTER: 82 bar.show(bar.last_progress + 1) 83 84 return archive 85 86 def _read_tar(self, filename): 87 files = {} 88 with tarfile.open(filename, "r:gz") as tar: 89 for tarinfo in tar: 90 files[tarinfo.name] = _tarinfo2mem(tar, tarinfo) 91 return files