tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

archive.py (5951B)


      1 # This Source Code Form is subject to the terms of the Mozilla Public
      2 # License, v. 2.0. If a copy of the MPL was not distributed with this
      3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
      4 
      5 import bz2
      6 import gzip
      7 import stat
      8 import tarfile
      9 
     10 from .files import BaseFile, File
     11 
     12 # 2016-01-01T00:00:00+0000
     13 DEFAULT_MTIME = 1451606400
     14 
     15 
     16 # Python 3.9 contains this change:
     17 #  https://github.com/python/cpython/commit/674935b8caf33e47c78f1b8e197b1b77a04992d2
     18 # which changes the output of tar creation compared to earlier versions.
     19 # As this code is used to generate tar files that are meant to be deterministic
     20 # across versions of python (specifically, it's used as part of computing the hash
     21 # of docker images, which needs to be identical between CI (which uses python 3.8),
     22 # and developer environments (using arbitrary versions of python, at this point,
     23 # most probably more recent than 3.9)).
     24 # What we do is subblass TarInfo so that if used on python >= 3.9, it reproduces the
     25 # behavior from python < 3.9.
     26 # Here's how it goes:
     27 # - the behavior in python >= 3.9 is the same as python < 3.9 when the type encoded
     28 # in the tarinfo is CHRTYPE or BLKTYPE.
     29 # - the value of the type is only compared in the context of choosing which behavior
     30 # to take
     31 # - we replace the type with the same value (so that using the value has no changes)
     32 # but that pretends to be the same as CHRTYPE so that the condition that enables the
     33 # old behavior is taken.
     34 class HackedType(bytes):
     35    def __eq__(self, other):
     36        if other == tarfile.CHRTYPE:
     37            return True
     38        return self == other
     39 
     40 
     41 class TarInfo(tarfile.TarInfo):
     42    @staticmethod
     43    def _create_header(info, format, encoding, errors):
     44        info["type"] = HackedType(info["type"])
     45        return tarfile.TarInfo._create_header(info, format, encoding, errors)
     46 
     47 
     48 def create_tar_from_files(fp, files):
     49    """Create a tar file deterministically.
     50 
     51    Receives a dict mapping names of files in the archive to local filesystem
     52    paths or ``mozpack.files.BaseFile`` instances.
     53 
     54    The files will be archived and written to the passed file handle opened
     55    for writing.
     56 
     57    Only regular files can be written.
     58 
     59    FUTURE accept a filename argument (or create APIs to write files)
     60    """
     61    # The format is explicitly set to tarfile.GNU_FORMAT, because this default format
     62    # has been changed in Python 3.8.
     63    with tarfile.open(
     64        name="", mode="w", fileobj=fp, dereference=True, format=tarfile.GNU_FORMAT
     65    ) as tf:
     66        for archive_path, f in sorted(files.items()):
     67            if not isinstance(f, BaseFile):
     68                f = File(f)
     69 
     70            ti = TarInfo(archive_path)
     71            ti.mode = f.mode or 0o0644
     72            ti.type = tarfile.REGTYPE
     73 
     74            if not ti.isreg():
     75                raise ValueError("not a regular file: %s" % f)
     76 
     77            # Disallow setuid and setgid bits. This is an arbitrary restriction.
     78            # However, since we set uid/gid to root:root, setuid and setgid
     79            # would be a glaring security hole if the archive were
     80            # uncompressed as root.
     81            if ti.mode & (stat.S_ISUID | stat.S_ISGID):
     82                raise ValueError("cannot add file with setuid or setgid set: %s" % f)
     83 
     84            # Set uid, gid, username, and group as deterministic values.
     85            ti.uid = 0
     86            ti.gid = 0
     87            ti.uname = ""
     88            ti.gname = ""
     89 
     90            # Set mtime to a constant value.
     91            ti.mtime = DEFAULT_MTIME
     92 
     93            ti.size = f.size()
     94            # tarfile wants to pass a size argument to read(). So just
     95            # wrap/buffer in a proper file object interface.
     96            tf.addfile(ti, f.open())
     97 
     98 
     99 def create_tar_gz_from_files(fp, files, filename=None, compresslevel=9):
    100    """Create a tar.gz file deterministically from files.
    101 
    102    This is a glorified wrapper around ``create_tar_from_files`` that
    103    adds gzip compression.
    104 
    105    The passed file handle should be opened for writing in binary mode.
    106    When the function returns, all data has been written to the handle.
    107    """
    108    # Offset 3-7 in the gzip header contains an mtime. Pin it to a known
    109    # value so output is deterministic.
    110    gf = gzip.GzipFile(
    111        filename=filename or "",
    112        mode="wb",
    113        fileobj=fp,
    114        compresslevel=compresslevel,
    115        mtime=DEFAULT_MTIME,
    116    )
    117    with gf:
    118        create_tar_from_files(gf, files)
    119 
    120 
    121 def create_tar_zst_from_files(fp, files, filename=None, compresslevel=9, threads=1):
    122    """Create a tar.zst file deterministically from files.
    123 
    124    This is a glorified wrapper around ``create_tar_from_files`` that
    125    adds zstandard compression.
    126 
    127    The passed file handle should be opened for writing in binary mode.
    128    When the function returns, all data has been written to the handle.
    129    """
    130    import zstandard
    131 
    132    cctx = zstandard.ZstdCompressor(level=compresslevel, threads=threads)
    133    with cctx.stream_writer(writer=fp) as compressor:
    134        create_tar_from_files(compressor, files)
    135 
    136 
    137 class _BZ2Proxy:
    138    """File object that proxies writes to a bz2 compressor."""
    139 
    140    def __init__(self, fp, compresslevel=9):
    141        self.fp = fp
    142        self.compressor = bz2.BZ2Compressor(compresslevel)
    143        self.pos = 0
    144 
    145    def tell(self):
    146        return self.pos
    147 
    148    def write(self, data):
    149        data = self.compressor.compress(data)
    150        self.pos += len(data)
    151        self.fp.write(data)
    152 
    153    def close(self):
    154        data = self.compressor.flush()
    155        self.pos += len(data)
    156        self.fp.write(data)
    157 
    158 
    159 def create_tar_bz2_from_files(fp, files, compresslevel=9):
    160    """Create a tar.bz2 file deterministically from files.
    161 
    162    This is a glorified wrapper around ``create_tar_from_files`` that
    163    adds bzip2 compression.
    164 
    165    This function is similar to ``create_tar_gzip_from_files()``.
    166    """
    167    proxy = _BZ2Proxy(fp, compresslevel=compresslevel)
    168    create_tar_from_files(proxy, files)
    169    proxy.close()