tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

copier.py (22354B)


      1 # This Source Code Form is subject to the terms of the Mozilla Public
      2 # License, v. 2.0. If a copy of the MPL was not distributed with this
      3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
      4 
      5 import errno
      6 import os
      7 import stat
      8 import sys
      9 from collections import Counter, OrderedDict, defaultdict
     10 from concurrent import futures
     11 
     12 import mozpack.path as mozpath
     13 from mozpack.errors import errors
     14 from mozpack.files import BaseFile, Dest
     15 
     16 
     17 class FileRegistry:
     18    """
     19    Generic container to keep track of a set of BaseFile instances. It
     20    preserves the order under which the files are added, but doesn't keep
     21    track of empty directories (directories are not stored at all).
     22    The paths associated with the BaseFile instances are relative to an
     23    unspecified (virtual) root directory.
     24 
     25        registry = FileRegistry()
     26        registry.add('foo/bar', file_instance)
     27    """
     28 
     29    def __init__(self):
     30        self._files = OrderedDict()
     31        self._required_directories = Counter()
     32        self._partial_paths_cache = {}
     33 
     34    def _partial_paths(self, path):
     35        """
     36        Turn "foo/bar/baz/zot" into ["foo/bar/baz", "foo/bar", "foo"].
     37        """
     38        dir_name = path.rpartition("/")[0]
     39        if not dir_name:
     40            return []
     41 
     42        partial_paths = self._partial_paths_cache.get(dir_name)
     43        if partial_paths:
     44            return partial_paths
     45 
     46        partial_paths = [dir_name] + self._partial_paths(dir_name)
     47 
     48        self._partial_paths_cache[dir_name] = partial_paths
     49        return partial_paths
     50 
     51    def add(self, path, content):
     52        """
     53        Add a BaseFile instance to the container, under the given path.
     54        """
     55        assert isinstance(content, BaseFile)
     56        if path in self._files:
     57            return errors.error("%s already added" % path)
     58        if self._required_directories[path] > 0:
     59            return errors.error("Can't add %s: it is a required directory" % path)
     60        # Check whether any parent of the given path is already stored
     61        partial_paths = self._partial_paths(path)
     62        for partial_path in partial_paths:
     63            if partial_path in self._files:
     64                return errors.error("Can't add %s: %s is a file" % (path, partial_path))
     65        self._files[path] = content
     66        self._required_directories.update(partial_paths)
     67 
     68    def match(self, pattern):
     69        """
     70        Return the list of paths, stored in the container, matching the
     71        given pattern. See the mozpack.path.match documentation for a
     72        description of the handled patterns.
     73        """
     74        if "*" in pattern:
     75            return [p for p in self.paths() if mozpath.match(p, pattern)]
     76        if pattern == "":
     77            return self.paths()
     78        if pattern in self._files:
     79            return [pattern]
     80        return [p for p in self.paths() if mozpath.basedir(p, [pattern]) == pattern]
     81 
     82    def remove(self, pattern):
     83        """
     84        Remove paths matching the given pattern from the container. See the
     85        mozpack.path.match documentation for a description of the handled
     86        patterns.
     87        """
     88        items = self.match(pattern)
     89        if not items:
     90            return errors.error(
     91                "Can't remove %s: %s"
     92                % (pattern, "not matching anything previously added")
     93            )
     94        for i in items:
     95            del self._files[i]
     96            self._required_directories.subtract(self._partial_paths(i))
     97 
     98    def paths(self):
     99        """
    100        Return all paths stored in the container, in the order they were added.
    101        """
    102        return list(self._files)
    103 
    104    def __len__(self):
    105        """
    106        Return number of paths stored in the container.
    107        """
    108        return len(self._files)
    109 
    110    def __contains__(self, pattern):
    111        raise RuntimeError(
    112            "'in' operator forbidden for %s. Use contains()." % self.__class__.__name__
    113        )
    114 
    115    def contains(self, pattern):
    116        """
    117        Return whether the container contains paths matching the given
    118        pattern. See the mozpack.path.match documentation for a description of
    119        the handled patterns.
    120        """
    121        return len(self.match(pattern)) > 0
    122 
    123    def __getitem__(self, path):
    124        """
    125        Return the BaseFile instance stored in the container for the given
    126        path.
    127        """
    128        return self._files[path]
    129 
    130    def __iter__(self):
    131        """
    132        Iterate over all (path, BaseFile instance) pairs from the container.
    133            for path, file in registry:
    134                (...)
    135        """
    136        return iter(self._files.items())
    137 
    138    def required_directories(self):
    139        """
    140        Return the set of directories required by the paths in the container,
    141        in no particular order.  The returned directories are relative to an
    142        unspecified (virtual) root directory (and do not include said root
    143        directory).
    144        """
    145        return set(k for k, v in self._required_directories.items() if v > 0)
    146 
    147    def output_to_inputs_tree(self):
    148        """
    149        Return a dictionary mapping each output path to the set of its
    150        required input paths.
    151 
    152        All paths are normalized.
    153        """
    154        tree = {}
    155        for output, file in self:
    156            output = mozpath.normpath(output)
    157            tree[output] = set(mozpath.normpath(f) for f in file.inputs())
    158        return tree
    159 
    160    def input_to_outputs_tree(self):
    161        """
    162        Return a dictionary mapping each input path to the set of
    163        impacted output paths.
    164 
    165        All paths are normalized.
    166        """
    167        tree = defaultdict(set)
    168        for output, file in self:
    169            output = mozpath.normpath(output)
    170            for input in file.inputs():
    171                input = mozpath.normpath(input)
    172                tree[input].add(output)
    173        return dict(tree)
    174 
    175 
    176 class FileRegistrySubtree:
    177    """A proxy class to give access to a subtree of an existing FileRegistry.
    178 
    179    Note this doesn't implement the whole FileRegistry interface."""
    180 
    181    def __new__(cls, base, registry):
    182        if not base:
    183            return registry
    184        return object.__new__(cls)
    185 
    186    def __init__(self, base, registry):
    187        self._base = base
    188        self._registry = registry
    189 
    190    def _get_path(self, path):
    191        # mozpath.join will return a trailing slash if path is empty, and we
    192        # don't want that.
    193        return mozpath.join(self._base, path) if path else self._base
    194 
    195    def add(self, path, content):
    196        return self._registry.add(self._get_path(path), content)
    197 
    198    def match(self, pattern):
    199        return [
    200            mozpath.relpath(p, self._base)
    201            for p in self._registry.match(self._get_path(pattern))
    202        ]
    203 
    204    def remove(self, pattern):
    205        return self._registry.remove(self._get_path(pattern))
    206 
    207    def paths(self):
    208        return [p for p, f in self]
    209 
    210    def __len__(self):
    211        return len(self.paths())
    212 
    213    def contains(self, pattern):
    214        return self._registry.contains(self._get_path(pattern))
    215 
    216    def __getitem__(self, path):
    217        return self._registry[self._get_path(path)]
    218 
    219    def __iter__(self):
    220        for p, f in self._registry:
    221            if mozpath.basedir(p, [self._base]):
    222                yield mozpath.relpath(p, self._base), f
    223 
    224 
    225 class FileCopyResult:
    226    """Represents results of a FileCopier.copy operation."""
    227 
    228    def __init__(self):
    229        self.updated_files = set()
    230        self.existing_files = set()
    231        self.removed_files = set()
    232        self.removed_directories = set()
    233 
    234    @property
    235    def updated_files_count(self):
    236        return len(self.updated_files)
    237 
    238    @property
    239    def existing_files_count(self):
    240        return len(self.existing_files)
    241 
    242    @property
    243    def removed_files_count(self):
    244        return len(self.removed_files)
    245 
    246    @property
    247    def removed_directories_count(self):
    248        return len(self.removed_directories)
    249 
    250 
    251 class FileCopier(FileRegistry):
    252    """
    253    FileRegistry with the ability to copy the registered files to a separate
    254    directory.
    255    """
    256 
    257    def copy(
    258        self,
    259        destination,
    260        skip_if_older=True,
    261        remove_unaccounted=True,
    262        remove_all_directory_symlinks=True,
    263        remove_empty_directories=True,
    264    ):
    265        """
    266        Copy all registered files to the given destination path. The given
    267        destination can be an existing directory, or not exist at all. It
    268        can't be e.g. a file.
    269        The copy process acts a bit like rsync: files are not copied when they
    270        don't need to (see mozpack.files for details on file.copy).
    271 
    272        By default, files in the destination directory that aren't
    273        registered are removed and empty directories are deleted. In
    274        addition, all directory symlinks in the destination directory
    275        are deleted: this is a conservative approach to ensure that we
    276        never accidently write files into a directory that is not the
    277        destination directory. In the worst case, we might have a
    278        directory symlink in the object directory to the source
    279        directory.
    280 
    281        To disable removing of unregistered files, pass
    282        remove_unaccounted=False. To disable removing empty
    283        directories, pass remove_empty_directories=False. In rare
    284        cases, you might want to maintain directory symlinks in the
    285        destination directory (at least those that are not required to
    286        be regular directories): pass
    287        remove_all_directory_symlinks=False. Exercise caution with
    288        this flag: you almost certainly do not want to preserve
    289        directory symlinks.
    290 
    291        Returns a FileCopyResult that details what changed.
    292        """
    293        assert isinstance(destination, str)
    294        assert not os.path.exists(destination) or os.path.isdir(destination)
    295 
    296        result = FileCopyResult()
    297        have_symlinks = hasattr(os, "symlink")
    298        destination = os.path.normpath(destination)
    299 
    300        # We create the destination directory specially. We can't do this as
    301        # part of the loop doing mkdir() below because that loop munges
    302        # symlinks and permissions and parent directories of the destination
    303        # directory may have their own weird schema. The contract is we only
    304        # manage children of destination, not its parents.
    305        os.makedirs(destination, exist_ok=True)
    306 
    307        # Because we could be handling thousands of files, code in this
    308        # function is optimized to minimize system calls. We prefer CPU time
    309        # in Python over possibly I/O bound filesystem calls to stat() and
    310        # friends.
    311 
    312        required_dirs = set([destination])
    313        required_dirs |= set(
    314            os.path.normpath(os.path.join(destination, d))
    315            for d in self.required_directories()
    316        )
    317 
    318        # Ensure destination directories are in place and proper.
    319        #
    320        # The "proper" bit is important. We need to ensure that directories
    321        # have appropriate permissions or we will be unable to discover
    322        # and write files. Furthermore, we need to verify directories aren't
    323        # symlinks.
    324        #
    325        # Symlinked directories (a symlink whose target is a directory) are
    326        # incompatible with us because our manifest talks in terms of files,
    327        # not directories. If we leave symlinked directories unchecked, we
    328        # would blindly follow symlinks and this might confuse file
    329        # installation. For example, if an existing directory is a symlink
    330        # to directory X and we attempt to install a symlink in this directory
    331        # to a file in directory X, we may create a recursive symlink!
    332        for d in sorted(required_dirs, key=len):
    333            os.makedirs(d, exist_ok=True)
    334 
    335            # We allow the destination to be a symlink because the caller
    336            # is responsible for managing the destination and we assume
    337            # they know what they are doing.
    338            if have_symlinks and d != destination:
    339                st = os.lstat(d)
    340                if stat.S_ISLNK(st.st_mode):
    341                    # While we have remove_unaccounted, it doesn't apply
    342                    # to directory symlinks because if it did, our behavior
    343                    # could be very wrong.
    344                    os.remove(d)
    345                    os.mkdir(d)
    346 
    347            if not os.access(d, os.W_OK):
    348                umask = os.umask(0o077)
    349                os.umask(umask)
    350                os.chmod(d, 0o777 & ~umask)
    351 
    352        if isinstance(remove_unaccounted, FileRegistry):
    353            existing_files = set(
    354                os.path.normpath(os.path.join(destination, p))
    355                for p in remove_unaccounted.paths()
    356            )
    357            existing_dirs = set(
    358                os.path.normpath(os.path.join(destination, p))
    359                for p in remove_unaccounted.required_directories()
    360            )
    361            existing_dirs |= {os.path.normpath(destination)}
    362        else:
    363            # While we have remove_unaccounted, it doesn't apply to empty
    364            # directories because it wouldn't make sense: an empty directory
    365            # is empty, so removing it should have no effect.
    366            existing_dirs = set()
    367            existing_files = set()
    368            for root, dirs, files in os.walk(destination):
    369                # We need to perform the same symlink detection as above.
    370                # os.walk() doesn't follow symlinks into directories by
    371                # default, so we need to check dirs (we can't wait for root).
    372                if have_symlinks:
    373                    filtered = []
    374                    for d in dirs:
    375                        full = os.path.join(root, d)
    376                        st = os.lstat(full)
    377                        if stat.S_ISLNK(st.st_mode):
    378                            # This directory symlink is not a required
    379                            # directory: any such symlink would have been
    380                            # removed and a directory created above.
    381                            if remove_all_directory_symlinks:
    382                                os.remove(full)
    383                                result.removed_files.add(os.path.normpath(full))
    384                            else:
    385                                existing_files.add(os.path.normpath(full))
    386                        else:
    387                            filtered.append(d)
    388 
    389                    dirs[:] = filtered
    390 
    391                existing_dirs.add(os.path.normpath(root))
    392 
    393                for d in dirs:
    394                    existing_dirs.add(os.path.normpath(os.path.join(root, d)))
    395 
    396                for f in files:
    397                    existing_files.add(os.path.normpath(os.path.join(root, f)))
    398 
    399        # Now we reconcile the state of the world against what we want.
    400        dest_files = set()
    401 
    402        # Install files.
    403        #
    404        # Creating/appending new files on Windows/NTFS is slow. So we use a
    405        # thread pool to speed it up significantly. The performance of this
    406        # loop is so critical to common build operations on Linux that the
    407        # overhead of the thread pool is worth avoiding, so we have 2 code
    408        # paths. We also employ a low water mark to prevent thread pool
    409        # creation if number of files is too small to benefit.
    410        copy_results = []
    411        if sys.platform == "win32" and len(self) > 100:
    412            with futures.ThreadPoolExecutor(4) as e:
    413                fs = []
    414                for p, f in self:
    415                    destfile = os.path.normpath(os.path.join(destination, p))
    416                    fs.append((destfile, e.submit(f.copy, destfile, skip_if_older)))
    417 
    418            copy_results = [(path, f.result) for path, f in fs]
    419        else:
    420            for p, f in self:
    421                destfile = os.path.normpath(os.path.join(destination, p))
    422                copy_results.append((destfile, f.copy(destfile, skip_if_older)))
    423 
    424        for destfile, copy_result in copy_results:
    425            dest_files.add(destfile)
    426            if copy_result:
    427                result.updated_files.add(destfile)
    428            else:
    429                result.existing_files.add(destfile)
    430 
    431        # Remove files no longer accounted for.
    432        if remove_unaccounted:
    433            for f in existing_files - dest_files:
    434                # Windows requires write access to remove files.
    435                if os.name == "nt" and not os.access(f, os.W_OK):
    436                    # It doesn't matter what we set permissions to since we
    437                    # will remove this file shortly.
    438                    os.chmod(f, 0o600)
    439 
    440                os.remove(f)
    441                result.removed_files.add(f)
    442 
    443        if not remove_empty_directories:
    444            return result
    445 
    446        # Figure out which directories can be removed. This is complicated
    447        # by the fact we optionally remove existing files. This would be easy
    448        # if we walked the directory tree after installing files. But, we're
    449        # trying to minimize system calls.
    450 
    451        # Start with the ideal set.
    452        remove_dirs = existing_dirs - required_dirs
    453 
    454        # Then don't remove directories if we didn't remove unaccounted files
    455        # and one of those files exists.
    456        if not remove_unaccounted:
    457            parents = set()
    458            pathsep = os.path.sep
    459            for f in existing_files:
    460                path = f
    461                while True:
    462                    # All the paths are normalized and relative by this point,
    463                    # so os.path.dirname would only do extra work.
    464                    dirname = path.rpartition(pathsep)[0]
    465                    if dirname in parents:
    466                        break
    467                    parents.add(dirname)
    468                    path = dirname
    469            remove_dirs -= parents
    470 
    471        # Remove empty directories that aren't required.
    472        for d in sorted(remove_dirs, key=len, reverse=True):
    473            try:
    474                try:
    475                    os.rmdir(d)
    476                except OSError as e:
    477                    if e.errno in (errno.EPERM, errno.EACCES):
    478                        # Permissions may not allow deletion. So ensure write
    479                        # access is in place before attempting to rmdir again.
    480                        os.chmod(d, 0o700)
    481                        os.rmdir(d)
    482                    else:
    483                        raise
    484            except OSError as e:
    485                # If remove_unaccounted is a # FileRegistry, then we have a
    486                # list of directories that may not be empty, so ignore rmdir
    487                # ENOTEMPTY errors for them.
    488                if (
    489                    isinstance(remove_unaccounted, FileRegistry)
    490                    and e.errno == errno.ENOTEMPTY
    491                ):
    492                    continue
    493                raise
    494            result.removed_directories.add(d)
    495 
    496        return result
    497 
    498 
    499 class Jarrer(FileRegistry, BaseFile):
    500    """
    501    FileRegistry with the ability to copy and pack the registered files as a
    502    jar file. Also acts as a BaseFile instance, to be copied with a FileCopier.
    503    """
    504 
    505    def __init__(self, compress=True):
    506        """
    507        Create a Jarrer instance. See mozpack.mozjar.JarWriter documentation
    508        for details on the compress argument.
    509        """
    510        self.compress = compress
    511        self._preload = []
    512        self._compress_options = {}  # Map path to compress boolean option.
    513        FileRegistry.__init__(self)
    514 
    515    def add(self, path, content, compress=None):
    516        FileRegistry.add(self, path, content)
    517        if compress is not None:
    518            self._compress_options[path] = compress
    519 
    520    def copy(self, dest, skip_if_older=True):
    521        """
    522        Pack all registered files in the given destination jar. The given
    523        destination jar may be a path to jar file, or a Dest instance for
    524        a jar file.
    525        If the destination jar file exists, its (compressed) contents are used
    526        instead of the registered BaseFile instances when appropriate.
    527        """
    528 
    529        class DeflaterDest(Dest):
    530            """
    531            Dest-like class, reading from a file-like object initially, but
    532            switching to a Deflater object if written to.
    533 
    534                dest = DeflaterDest(original_file)
    535                dest.read()      # Reads original_file
    536                dest.write(data) # Creates a Deflater and write data there
    537                dest.read()      # Re-opens the Deflater and reads from it
    538            """
    539 
    540            def __init__(self, orig=None, compress=True):
    541                self.mode = None
    542                self.deflater = orig
    543                self.compress = compress
    544 
    545            def read(self, length=-1):
    546                if self.mode != "r":
    547                    assert self.mode is None
    548                    self.mode = "r"
    549                return self.deflater.read(length)
    550 
    551            def write(self, data):
    552                if self.mode != "w":
    553                    from mozpack.mozjar import Deflater
    554 
    555                    self.deflater = Deflater(self.compress)
    556                    self.mode = "w"
    557                self.deflater.write(data)
    558 
    559            def exists(self):
    560                return self.deflater is not None
    561 
    562        if isinstance(dest, str):
    563            dest = Dest(dest)
    564        assert isinstance(dest, Dest)
    565 
    566        from mozpack.mozjar import JarReader, JarWriter
    567 
    568        try:
    569            old_jar = JarReader(fileobj=dest)
    570        except Exception:
    571            old_jar = []
    572 
    573        old_contents = dict([(f.filename, f) for f in old_jar])
    574 
    575        with JarWriter(fileobj=dest, compress=self.compress) as jar:
    576            for path, file in self:
    577                compress = self._compress_options.get(path, self.compress)
    578                if path in old_contents:
    579                    deflater = DeflaterDest(old_contents[path], compress)
    580                else:
    581                    deflater = DeflaterDest(compress=compress)
    582                file.copy(deflater, skip_if_older)
    583                jar.add(path, deflater.deflater, mode=file.mode, compress=compress)
    584            if self._preload:
    585                jar.preload(self._preload)
    586 
    587    def open(self):
    588        raise RuntimeError("unsupported")
    589 
    590    def preload(self, paths):
    591        """
    592        Add the given set of paths to the list of preloaded files. See
    593        mozpack.mozjar.JarWriter documentation for details on jar preloading.
    594        """
    595        self._preload.extend(paths)