tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

archiver.py (2989B)


      1 # This Source Code Form is subject to the terms of the Mozilla Public
      2 # License, v. 2.0. If a copy of the MPL was not distributed with this
      3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
      4 """Helper to create tarballs."""
      5 
      6 import copy
      7 import glob
      8 import os
      9 import tarfile
     10 
     11 from condprof import progress
     12 from condprof.util import TASK_CLUSTER
     13 
     14 
     15 def _tarinfo2mem(tar, tarinfo):
     16    metadata = copy.copy(tarinfo)
     17    try:
     18        data = tar.extractfile(tarinfo)
     19        if data is not None:
     20            data = data.read()
     21    except Exception:
     22        data = None
     23 
     24    return metadata, data
     25 
     26 
     27 class Archiver:
     28    def __init__(self, scenario, profile_dir, archives_dir):
     29        self.profile_dir = profile_dir
     30        self.archives_dir = archives_dir
     31        self.scenario = scenario
     32 
     33    def _strftime(self, date, template="-%Y-%m-%d-hp.tar.gz"):
     34        return date.strftime(self.scenario + template)
     35 
     36    def _get_archive_path(self, when):
     37        archive = self._strftime(when)
     38        return os.path.join(self.archives_dir, archive), archive
     39 
     40    def create_archive(self, when, iterator=None):
     41 
     42        def _filter(tarinfo):
     43            name = tarinfo.name
     44            if name.endswith((".dmp", ".extra")) and "minidumps" in name:
     45                # Inore crash files such as:
     46                # - minidumps/5b2d4a13-54e6-5ebb-9a6f-913a3451e56a.dmp
     47                # - minidumps/5b2d4a13-54e6-5ebb-9a6f-913a3451e56a.extra
     48                # ... because they can cause permafailing tests (bug 2007615).
     49                #
     50                # We are excluding them here instead of removing the crash dump
     51                # from the filesystem before archival, in case anyone wants to
     52                # inspect these files.
     53                return None
     54            return tarinfo
     55 
     56        if iterator is None:
     57 
     58            def _files(tar):
     59                files = glob.glob(os.path.join(self.profile_dir, "*"))
     60                yield len(files)
     61                for filename in files:
     62                    try:
     63                        tar.add(filename, os.path.basename(filename), filter=_filter)
     64                        yield filename
     65                    except FileNotFoundError:  # NOQA
     66                        # locks and such
     67                        pass
     68 
     69            iterator = _files
     70 
     71        if isinstance(when, str):
     72            archive = when
     73        else:
     74            archive, __ = self._get_archive_path(when)
     75 
     76        with tarfile.open(archive, "w:gz", dereference=True) as tar:
     77            it = iterator(tar)
     78            size = next(it)
     79            with progress.Bar(expected_size=size) as bar:
     80                for filename in it:
     81                    if not TASK_CLUSTER:
     82                        bar.show(bar.last_progress + 1)
     83 
     84        return archive
     85 
     86    def _read_tar(self, filename):
     87        files = {}
     88        with tarfile.open(filename, "r:gz") as tar:
     89            for tarinfo in tar:
     90                files[tarinfo.name] = _tarinfo2mem(tar, tarinfo)
     91        return files