tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

files.py (45469B)


      1 # This Source Code Form is subject to the terms of the Mozilla Public
      2 # License, v. 2.0. If a copy of the MPL was not distributed with this
      3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
      4 
      5 import bisect
      6 import errno
      7 import inspect
      8 import json
      9 import os
     10 import platform
     11 import shutil
     12 import stat
     13 import subprocess
     14 import tempfile
     15 import uuid
     16 from collections import OrderedDict
     17 from io import BytesIO
     18 from itertools import chain, takewhile
     19 from pathlib import Path
     20 from tarfile import TarFile, TarInfo
     21 from tempfile import mkstemp
     22 
     23 import mozpack.path as mozpath
     24 from mozbuild import makeutil
     25 from mozbuild.nodeutil import package_setup
     26 from mozbuild.preprocessor import Preprocessor
     27 from mozbuild.util import FileAvoidWrite, ensure_unicode, memoize
     28 from mozpack.chrome.manifest import ManifestEntry, ManifestInterfaces
     29 from mozpack.errors import ErrorMessage, errors
     30 from mozpack.executables import elfhack, is_executable, may_elfhack, may_strip, strip
     31 from mozpack.mozjar import JarReader
     32 
     33 try:
     34    import hglib
     35 except ImportError:
     36    hglib = None
     37 
     38 
     39 # For clean builds, copying files on win32 using CopyFile through ctypes is
     40 # ~2x as fast as using shutil.copyfile.
     41 if platform.system() != "Windows":
     42    _copyfile = shutil.copyfile
     43 else:
     44    import ctypes
     45 
     46    _kernel32 = ctypes.windll.kernel32
     47    _CopyFileA = _kernel32.CopyFileA
     48    _CopyFileW = _kernel32.CopyFileW
     49 
     50    def _copyfile(src, dest):
     51        # False indicates `dest` should be overwritten if it exists already.
     52        if isinstance(src, str) and isinstance(dest, str):
     53            _CopyFileW(src, dest, False)
     54        elif isinstance(src, str) and isinstance(dest, str):
     55            _CopyFileA(src, dest, False)
     56        else:
     57            raise TypeError("mismatched path types!")
     58 
     59 
     60 # Helper function; ensures we always open files with the correct encoding when
     61 # opening them in text mode.
     62 def _open(path, mode="r"):
     63    if "b" not in mode:
     64        return open(path, mode, encoding="utf-8")
     65    return open(path, mode)
     66 
     67 
     68 class Dest:
     69    """
     70    Helper interface for BaseFile.copy. The interface works as follows:
     71      - read() and write() can be used to sequentially read/write from the underlying file.
     72      - a call to read() after a write() will re-open the underlying file and read from it.
     73      - a call to write() after a read() will re-open the underlying file, emptying it, and write to it.
     74    """
     75 
     76    def __init__(self, path):
     77        self.file = None
     78        self.mode = None
     79        self.path = ensure_unicode(path)
     80 
     81    @property
     82    def name(self):
     83        return self.path
     84 
     85    def read(self, length=-1):
     86        if self.mode != "r":
     87            self.file = _open(self.path, mode="rb")
     88            self.mode = "r"
     89        return self.file.read(length)
     90 
     91    def write(self, data):
     92        if self.mode != "w":
     93            self.file = _open(self.path, mode="wb")
     94            self.mode = "w"
     95        if isinstance(data, str):
     96            data = data.encode()
     97        return self.file.write(data)
     98 
     99    def exists(self):
    100        return os.path.exists(self.path)
    101 
    102    def close(self):
    103        if self.mode:
    104            self.mode = None
    105            self.file.close()
    106            self.file = None
    107 
    108 
    109 class BaseFile:
    110    """
    111    Base interface and helper for file copying. Derived class may implement
    112    their own copy function, or rely on BaseFile.copy using the open() member
    113    function and/or the path property.
    114    """
    115 
    116    @staticmethod
    117    def is_older(first, second):
    118        """
    119        Compares the modification time of two files, and returns whether the
    120        ``first`` file is older than the ``second`` file.
    121        """
    122        # os.path.getmtime returns a result in seconds with precision up to
    123        # the microsecond. But microsecond is too precise because
    124        # shutil.copystat only copies milliseconds, and seconds is not
    125        # enough precision.
    126        return int(os.path.getmtime(first) * 1000) <= int(
    127            os.path.getmtime(second) * 1000
    128        )
    129 
    130    @staticmethod
    131    def any_newer(dest, inputs):
    132        """
    133        Compares the modification time of ``dest`` to multiple input files, and
    134        returns whether any of the ``inputs`` is newer (has a later mtime) than
    135        ``dest``.
    136        """
    137        # os.path.getmtime returns a result in seconds with precision up to
    138        # the microsecond. But microsecond is too precise because
    139        # shutil.copystat only copies milliseconds, and seconds is not
    140        # enough precision.
    141        dest_mtime = int(os.path.getmtime(dest) * 1000)
    142        for input in inputs:
    143            try:
    144                src_mtime = int(os.path.getmtime(input) * 1000)
    145            except OSError as e:
    146                if e.errno == errno.ENOENT:
    147                    # If an input file was removed, we should update.
    148                    return True
    149                raise
    150            if dest_mtime < src_mtime:
    151                return True
    152        return False
    153 
    154    @staticmethod
    155    def normalize_mode(mode):
    156        # Normalize file mode:
    157        # - keep file type (e.g. S_IFREG)
    158        ret = stat.S_IFMT(mode)
    159        # - expand user read and execute permissions to everyone
    160        if mode & 0o0400:
    161            ret |= 0o0444
    162        if mode & 0o0100:
    163            ret |= 0o0111
    164        # - keep user write permissions
    165        if mode & 0o0200:
    166            ret |= 0o0200
    167        # - leave away sticky bit, setuid, setgid
    168        return ret
    169 
    170    def copy(self, dest, skip_if_older=True):
    171        """
    172        Copy the BaseFile content to the destination given as a string or a
    173        Dest instance. Avoids replacing existing files if the BaseFile content
    174        matches that of the destination, or in case of plain files, if the
    175        destination is newer than the original file. This latter behaviour is
    176        disabled when skip_if_older is False.
    177        Returns whether a copy was actually performed (True) or not (False).
    178        """
    179        if isinstance(dest, str):
    180            dest = Dest(dest)
    181        else:
    182            assert isinstance(dest, Dest)
    183 
    184        can_skip_content_check = False
    185        if not dest.exists():
    186            can_skip_content_check = True
    187        elif getattr(self, "path", None) and getattr(dest, "path", None):
    188            if skip_if_older and BaseFile.is_older(self.path, dest.path):
    189                return False
    190            elif os.path.getsize(self.path) != os.path.getsize(dest.path):
    191                can_skip_content_check = True
    192 
    193        if can_skip_content_check:
    194            if getattr(self, "path", None) and getattr(dest, "path", None):
    195                # The destination directory must exist, or CopyFile will fail.
    196                destdir = os.path.dirname(dest.path)
    197                os.makedirs(destdir, exist_ok=True)
    198                _copyfile(self.path, dest.path)
    199                shutil.copystat(self.path, dest.path)
    200            else:
    201                # Ensure the file is always created
    202                if not dest.exists():
    203                    dest.write(b"")
    204                shutil.copyfileobj(self.open(), dest)
    205            return True
    206 
    207        src = self.open()
    208        accumulated_src_content = []
    209        while True:
    210            dest_content = dest.read(32768)
    211            src_content = src.read(32768)
    212            accumulated_src_content.append(src_content)
    213            if len(dest_content) == len(src_content) == 0:
    214                break
    215            # If the read content differs between origin and destination,
    216            # write what was read up to now, and copy the remainder.
    217            if dest_content != src_content:
    218                dest.write(b"".join(accumulated_src_content))
    219                shutil.copyfileobj(src, dest)
    220                break
    221        if hasattr(self, "path") and hasattr(dest, "path"):
    222            shutil.copystat(self.path, dest.path)
    223        return True
    224 
    225    def open(self):
    226        """
    227        Return a file-like object allowing to read() the content of the
    228        associated file. This is meant to be overloaded in subclasses to return
    229        a custom file-like object.
    230        """
    231        assert self.path is not None
    232        return open(self.path, "rb")
    233 
    234    def read(self):
    235        raise NotImplementedError("BaseFile.read() not implemented. Bug 1170329.")
    236 
    237    def size(self):
    238        """Returns size of the entry.
    239 
    240        Derived classes are highly encouraged to override this with a more
    241        optimal implementation.
    242        """
    243        return len(self.read())
    244 
    245    @property
    246    def mode(self):
    247        """
    248        Return the file's unix mode, or None if it has no meaning.
    249        """
    250        return None
    251 
    252    def inputs(self):
    253        """
    254        Return an iterable of the input file paths that impact this output file.
    255        """
    256        raise NotImplementedError("BaseFile.inputs() not implemented.")
    257 
    258 
    259 class File(BaseFile):
    260    """
    261    File class for plain files.
    262    """
    263 
    264    def __init__(self, path):
    265        self.path = ensure_unicode(path)
    266 
    267    @property
    268    def mode(self):
    269        """
    270        Return the file's unix mode, as returned by os.stat().st_mode.
    271        """
    272        if platform.system() == "Windows":
    273            return None
    274        assert self.path is not None
    275        mode = os.stat(self.path).st_mode
    276        return self.normalize_mode(mode)
    277 
    278    def read(self):
    279        """Return the contents of the file."""
    280        with open(self.path, "rb") as fh:
    281            return fh.read()
    282 
    283    def size(self):
    284        return os.stat(self.path).st_size
    285 
    286    def inputs(self):
    287        return (self.path,)
    288 
    289 
    290 class ExecutableFile(File):
    291    """
    292    File class for executable and library files on OS/2, OS/X and ELF systems.
    293    (see mozpack.executables.is_executable documentation).
    294    """
    295 
    296    def __init__(self, path):
    297        File.__init__(self, path)
    298 
    299    def copy(self, dest, skip_if_older=True):
    300        real_dest = dest
    301        if not isinstance(dest, str):
    302            fd, dest = mkstemp()
    303            os.close(fd)
    304            os.remove(dest)
    305        assert isinstance(dest, str)
    306        # If File.copy didn't actually copy because dest is newer, check the
    307        # file sizes. If dest is smaller, it means it is already stripped and
    308        # elfhacked, so we can skip.
    309        if not File.copy(self, dest, skip_if_older) and os.path.getsize(
    310            self.path
    311        ) > os.path.getsize(dest):
    312            return False
    313        try:
    314            if may_strip(dest):
    315                strip(dest)
    316            if may_elfhack(dest):
    317                elfhack(dest)
    318        except ErrorMessage:
    319            os.remove(dest)
    320            raise
    321 
    322        if real_dest != dest:
    323            f = File(dest)
    324            ret = f.copy(real_dest, skip_if_older)
    325            os.remove(dest)
    326            return ret
    327        return True
    328 
    329 
    330 class AbsoluteSymlinkFile(File):
    331    """File class that is copied by symlinking (if available).
    332 
    333    This class only works if the target path is absolute.
    334    """
    335 
    336    def __init__(self, path):
    337        if not os.path.isabs(path):
    338            raise ValueError("Symlink target not absolute: %s" % path)
    339 
    340        File.__init__(self, path)
    341 
    342    def copy(self, dest, skip_if_older=True):
    343        assert isinstance(dest, str)
    344 
    345        # The logic in this function is complicated by the fact that symlinks
    346        # aren't universally supported. So, where symlinks aren't supported, we
    347        # fall back to file copying. Keep in mind that symlink support is
    348        # per-filesystem, not per-OS.
    349 
    350        # Handle the simple case where symlinks are definitely not supported by
    351        # falling back to file copy.
    352        if not hasattr(os, "symlink"):
    353            return File.copy(self, dest, skip_if_older=skip_if_older)
    354 
    355        # Always verify the symlink target path exists.
    356        if not os.path.exists(self.path):
    357            errors.fatal("Symlink target path does not exist: %s" % self.path)
    358 
    359        st = None
    360 
    361        try:
    362            st = os.lstat(dest)
    363        except OSError as ose:
    364            if ose.errno != errno.ENOENT:
    365                raise
    366 
    367        # If the dest is a symlink pointing to us, we have nothing to do.
    368        # If it's the wrong symlink, the filesystem must support symlinks,
    369        # so we replace with a proper symlink.
    370        if st and stat.S_ISLNK(st.st_mode):
    371            link = os.readlink(dest)
    372            if link == self.path:
    373                return False
    374 
    375            os.remove(dest)
    376            os.symlink(self.path, dest)
    377            return True
    378 
    379        # If the destination doesn't exist, we try to create a symlink. If that
    380        # fails, we fall back to copy code.
    381        if not st:
    382            try:
    383                os.symlink(self.path, dest)
    384                return True
    385            except OSError:
    386                return File.copy(self, dest, skip_if_older=skip_if_older)
    387 
    388        # Now the complicated part. If the destination exists, we could be
    389        # replacing a file with a symlink. Or, the filesystem may not support
    390        # symlinks. We want to minimize I/O overhead for performance reasons,
    391        # so we keep the existing destination file around as long as possible.
    392        # A lot of the system calls would be eliminated if we cached whether
    393        # symlinks are supported. However, even if we performed a single
    394        # up-front test of whether the root of the destination directory
    395        # supports symlinks, there's no guarantee that all operations for that
    396        # dest (or source) would be on the same filesystem and would support
    397        # symlinks.
    398        #
    399        # Our strategy is to attempt to create a new symlink with a random
    400        # name. If that fails, we fall back to copy mode. If that works, we
    401        # remove the old destination and move the newly-created symlink into
    402        # its place.
    403 
    404        temp_dest = os.path.join(os.path.dirname(dest), str(uuid.uuid4()))
    405        try:
    406            os.symlink(self.path, temp_dest)
    407        # TODO Figure out exactly how symlink creation fails and only trap
    408        # that.
    409        except OSError:
    410            return File.copy(self, dest, skip_if_older=skip_if_older)
    411 
    412        # If removing the original file fails, don't forget to clean up the
    413        # temporary symlink.
    414        try:
    415            os.remove(dest)
    416        except OSError:
    417            os.remove(temp_dest)
    418            raise
    419 
    420        os.rename(temp_dest, dest)
    421        return True
    422 
    423 
    424 class HardlinkFile(File):
    425    """File class that is copied by hard linking (if available)
    426 
    427    This is similar to the AbsoluteSymlinkFile, but with hard links. The symlink
    428    implementation requires paths to be absolute, because they are resolved at
    429    read time, which makes relative paths messy. Hard links resolve paths at
    430    link-creation time, so relative paths are fine.
    431    """
    432 
    433    def copy(self, dest, skip_if_older=True):
    434        assert isinstance(dest, str)
    435 
    436        if not hasattr(os, "link"):
    437            return super().copy(dest, skip_if_older=skip_if_older)
    438 
    439        try:
    440            path_st = os.stat(self.path)
    441        except OSError as e:
    442            if e.errno == errno.ENOENT:
    443                errors.fatal("Hard link target path does not exist: %s" % self.path)
    444            else:
    445                raise
    446 
    447        st = None
    448        try:
    449            st = os.lstat(dest)
    450        except OSError as e:
    451            if e.errno != errno.ENOENT:
    452                raise
    453 
    454        if st:
    455            # The dest already points to the right place.
    456            if st.st_dev == path_st.st_dev and st.st_ino == path_st.st_ino:
    457                return False
    458            # The dest exists and it points to the wrong place
    459            os.remove(dest)
    460 
    461        # At this point, either the dest used to exist and we just deleted it,
    462        # or it never existed. We can now safely create the hard link.
    463        try:
    464            os.link(self.path, dest)
    465        except OSError:
    466            # If we can't hard link, fall back to copying
    467            return super().copy(dest, skip_if_older=skip_if_older)
    468        return True
    469 
    470 
    471 class ExistingFile(BaseFile):
    472    """
    473    File class that represents a file that may exist but whose content comes
    474    from elsewhere.
    475 
    476    This purpose of this class is to account for files that are installed via
    477    external means. It is typically only used in manifests or in registries to
    478    account for files.
    479 
    480    When asked to copy, this class does nothing because nothing is known about
    481    the source file/data.
    482 
    483    Instances of this class come in two flavors: required and optional. If an
    484    existing file is required, it must exist during copy() or an error is
    485    raised.
    486    """
    487 
    488    def __init__(self, required):
    489        self.required = required
    490 
    491    def copy(self, dest, skip_if_older=True):
    492        if isinstance(dest, str):
    493            dest = Dest(dest)
    494        else:
    495            assert isinstance(dest, Dest)
    496 
    497        if not self.required:
    498            return
    499 
    500        if not dest.exists():
    501            errors.fatal("Required existing file doesn't exist: %s" % dest.path)
    502 
    503    def inputs(self):
    504        return ()
    505 
    506 
    507 class PreprocessedFile(BaseFile):
    508    """
    509    File class for a file that is preprocessed. PreprocessedFile.copy() runs
    510    the preprocessor on the file to create the output.
    511    """
    512 
    513    def __init__(
    514        self,
    515        path,
    516        depfile_path,
    517        marker,
    518        defines,
    519        extra_depends=None,
    520        silence_missing_directive_warnings=False,
    521    ):
    522        self.path = ensure_unicode(path)
    523        self.depfile = ensure_unicode(depfile_path)
    524        self.marker = marker
    525        self.defines = defines
    526        self.extra_depends = list(extra_depends or [])
    527        self.silence_missing_directive_warnings = silence_missing_directive_warnings
    528 
    529    def inputs(self):
    530        pp = Preprocessor(defines=self.defines, marker=self.marker)
    531        pp.setSilenceDirectiveWarnings(self.silence_missing_directive_warnings)
    532 
    533        with _open(self.path, "r") as input:
    534            with _open(os.devnull, "w") as output:
    535                pp.processFile(input=input, output=output)
    536 
    537        # This always yields at least self.path.
    538        return pp.includes
    539 
    540    def copy(self, dest, skip_if_older=True):
    541        """
    542        Invokes the preprocessor to create the destination file.
    543        """
    544        if isinstance(dest, str):
    545            dest = Dest(dest)
    546        else:
    547            assert isinstance(dest, Dest)
    548 
    549        # We have to account for the case where the destination exists and is a
    550        # symlink to something. Since we know the preprocessor is certainly not
    551        # going to create a symlink, we can just remove the existing one. If the
    552        # destination is not a symlink, we leave it alone, since we're going to
    553        # overwrite its contents anyway.
    554        # If symlinks aren't supported at all, we can skip this step.
    555        if hasattr(os, "symlink") and os.path.islink(dest.path):
    556            os.remove(dest.path)
    557 
    558        pp_deps = set(self.extra_depends)
    559 
    560        # If a dependency file was specified, and it exists, add any
    561        # dependencies from that file to our list.
    562        if self.depfile and os.path.exists(self.depfile):
    563            target = mozpath.normpath(dest.name)
    564            with _open(self.depfile, "rt") as fileobj:
    565                for rule in makeutil.read_dep_makefile(fileobj):
    566                    if target in rule.targets():
    567                        pp_deps.update(rule.dependencies())
    568 
    569        skip = False
    570        if dest.exists() and skip_if_older:
    571            # If a dependency file was specified, and it doesn't exist,
    572            # assume that the preprocessor needs to be rerun. That will
    573            # regenerate the dependency file.
    574            if self.depfile and not os.path.exists(self.depfile):
    575                skip = False
    576            else:
    577                skip = not BaseFile.any_newer(dest.path, pp_deps)
    578 
    579        if skip:
    580            return False
    581 
    582        deps_out = None
    583        if self.depfile:
    584            deps_out = FileAvoidWrite(self.depfile)
    585        pp = Preprocessor(defines=self.defines, marker=self.marker)
    586        pp.setSilenceDirectiveWarnings(self.silence_missing_directive_warnings)
    587 
    588        with _open(self.path, "r") as input:
    589            pp.processFile(input=input, output=dest, depfile=deps_out)
    590 
    591        dest.close()
    592        if self.depfile:
    593            deps_out.close()
    594 
    595        return True
    596 
    597 
    598 class GeneratedFile(BaseFile):
    599    """
    600    File class for content with no previous existence on the filesystem.
    601    """
    602 
    603    def __init__(self, content):
    604        self._content = content
    605 
    606    @property
    607    def content(self):
    608        if inspect.isfunction(self._content):
    609            self._content = self._content()
    610        if isinstance(self._content, str):
    611            return self._content.encode()
    612        return self._content
    613 
    614    @content.setter
    615    def content(self, content):
    616        self._content = content
    617 
    618    def open(self):
    619        return BytesIO(self.content)
    620 
    621    def read(self):
    622        return self.content
    623 
    624    def size(self):
    625        return len(self.content)
    626 
    627    def inputs(self):
    628        return ()
    629 
    630 
    631 class DeflatedFile(BaseFile):
    632    """
    633    File class for members of a jar archive. DeflatedFile.copy() effectively
    634    extracts the file from the jar archive.
    635    """
    636 
    637    def __init__(self, file):
    638        from mozpack.mozjar import JarFileReader
    639 
    640        assert isinstance(file, JarFileReader)
    641        self.file = file
    642 
    643    def open(self):
    644        self.file.seek(0)
    645        return self.file
    646 
    647 
    648 class ExtractedTarFile(GeneratedFile):
    649    """
    650    File class for members of a tar archive. Contents of the underlying file
    651    are extracted immediately and stored in memory.
    652    """
    653 
    654    def __init__(self, tar, info):
    655        assert isinstance(info, TarInfo)
    656        assert isinstance(tar, TarFile)
    657        GeneratedFile.__init__(self, tar.extractfile(info).read())
    658        self._unix_mode = self.normalize_mode(info.mode)
    659 
    660    @property
    661    def mode(self):
    662        return self._unix_mode
    663 
    664    def read(self):
    665        return self.content
    666 
    667 
    668 class ManifestFile(BaseFile):
    669    """
    670    File class for a manifest file. It takes individual manifest entries (using
    671    the add() and remove() member functions), and adjusts them to be relative
    672    to the base path for the manifest, given at creation.
    673    Example:
    674        There is a manifest entry "content foobar foobar/content/" relative
    675        to "foobar/chrome". When packaging, the entry will be stored in
    676        jar:foobar/omni.ja!/chrome/chrome.manifest, which means the entry
    677        will have to be relative to "chrome" instead of "foobar/chrome". This
    678        doesn't really matter when serializing the entry, since this base path
    679        is not written out, but it matters when moving the entry at the same
    680        time, e.g. to jar:foobar/omni.ja!/chrome.manifest, which we don't do
    681        currently but could in the future.
    682    """
    683 
    684    def __init__(self, base, entries=None):
    685        self._base = base
    686        self._entries = []
    687        self._interfaces = []
    688        for e in entries or []:
    689            self.add(e)
    690 
    691    def add(self, entry):
    692        """
    693        Add the given entry to the manifest. Entries are rebased at open() time
    694        instead of add() time so that they can be more easily remove()d.
    695        """
    696        assert isinstance(entry, ManifestEntry)
    697        if isinstance(entry, ManifestInterfaces):
    698            self._interfaces.append(entry)
    699        else:
    700            self._entries.append(entry)
    701 
    702    def remove(self, entry):
    703        """
    704        Remove the given entry from the manifest.
    705        """
    706        assert isinstance(entry, ManifestEntry)
    707        if isinstance(entry, ManifestInterfaces):
    708            self._interfaces.remove(entry)
    709        else:
    710            self._entries.remove(entry)
    711 
    712    def open(self):
    713        """
    714        Return a file-like object allowing to read() the serialized content of
    715        the manifest.
    716        """
    717        content = "".join(
    718            "%s\n" % e.rebase(self._base)
    719            for e in chain(self._entries, self._interfaces)
    720        )
    721        return BytesIO(content.encode())
    722 
    723    def __iter__(self):
    724        """
    725        Iterate over entries in the manifest file.
    726        """
    727        return chain(self._entries, self._interfaces)
    728 
    729    def isempty(self):
    730        """
    731        Return whether there are manifest entries to write
    732        """
    733        return len(self._entries) + len(self._interfaces) == 0
    734 
    735 
    736 class MinifiedCommentStripped(BaseFile):
    737    """
    738    File class for content minified by stripping comments. This wraps around a
    739    BaseFile instance, and removes lines starting with a # from its content.
    740    """
    741 
    742    def __init__(self, file):
    743        assert isinstance(file, BaseFile)
    744        self._file = file
    745 
    746    def open(self):
    747        """
    748        Return a file-like object allowing to read() the minified content of
    749        the underlying file.
    750        """
    751        content = "".join(
    752            l
    753            for l in [s.decode() for s in self._file.open().readlines()]
    754            if not l.startswith("#")
    755        )
    756        return BytesIO(content.encode())
    757 
    758 
    759 class MinifiedJavaScript(BaseFile):
    760    """
    761    Minify JavaScript files using Terser while preserving
    762    class and function names for better debugging.
    763    """
    764 
    765    TERSER_CONFIG = {
    766        "parse": {
    767            "ecma": 2020,
    768            "module": True,
    769        },
    770        "compress": {
    771            "unused": True,
    772            "passes": 3,
    773            "ecma": 2020,
    774        },
    775        "mangle": {
    776            "keep_classnames": True,  # Preserve class names
    777            "keep_fnames": True,  # Preserve function names
    778        },
    779        "format": {
    780            "comments": "/@lic|webpackIgnore|@vite-ignore/i",
    781            "ascii_only": True,
    782            "ecma": 2020,
    783        },
    784        "sourceMap": False,
    785    }
    786 
    787    def __init__(self, file, filepath):
    788        """
    789        Initialize with a BaseFile instance to minify.
    790        """
    791        self._file = file
    792        self._filepath = filepath
    793 
    794    def _minify_with_terser(self, source_content):
    795        """
    796        Minify JavaScript content using Terser
    797        """
    798        if len(source_content) == 0:
    799            return source_content
    800 
    801        import buildconfig
    802 
    803        node_path = buildconfig.substs.get("NODEJS")
    804        if not node_path:
    805            errors.fatal("NODEJS not found in build configuration")
    806 
    807        topsrcdir = Path(buildconfig.topsrcdir)
    808 
    809        if os.environ.get("MOZ_AUTOMATION"):
    810            fetches_terser = (
    811                Path(os.environ["MOZ_FETCHES_DIR"])
    812                / "terser"
    813                / "node_modules"
    814                / "terser"
    815                / "bin"
    816                / "terser"
    817            )
    818            if fetches_terser.exists():
    819                terser_path = fetches_terser
    820            else:
    821                errors.fatal(f"Terser toolchain not found at {fetches_terser}.")
    822        else:
    823            terser_dir = topsrcdir / "tools" / "terser"
    824            terser_path = terser_dir / "node_modules" / "terser" / "bin" / "terser"
    825 
    826            if not terser_path.exists():
    827                # Automatically set up node_modules if terser is not found
    828                package_setup(str(terser_dir), "terser")
    829 
    830                # Verify that terser is now available after setup
    831                if not terser_path.exists():
    832                    errors.fatal(
    833                        f"Terser is required for JavaScript minification but could not be installed at {terser_path}. "
    834                        "Package setup may have failed."
    835                    )
    836 
    837        terser_cmd = [node_path, str(terser_path)]
    838 
    839        with tempfile.TemporaryDirectory() as temp_dir:
    840            temp_path = Path(temp_dir)
    841            config_path = temp_path / "terser_config.json"
    842            source_path = temp_path / "source.js"
    843 
    844            config_path.write_text(json.dumps(self.TERSER_CONFIG), encoding="utf-8")
    845            source_path.write_bytes(source_content)
    846 
    847            try:
    848                result = subprocess.run(
    849                    terser_cmd
    850                    + [
    851                        source_path,
    852                        "--config-file",
    853                        config_path,
    854                    ],
    855                    capture_output=True,
    856                    check=False,
    857                )
    858 
    859                if result.returncode == 0:
    860                    return result.stdout
    861                else:
    862                    error_msg = result.stderr.decode("utf-8", errors="ignore")
    863                    errors.error(
    864                        f"Terser minification failed for {self._filepath}: {error_msg}"
    865                    )
    866                    return source_content
    867 
    868            except subprocess.SubprocessError as e:
    869                errors.error(f"Error running Terser for {self._filepath}: {e}")
    870                return source_content
    871 
    872    def open(self):
    873        """
    874        Return a file-like object with the minified content.
    875        """
    876        source_content = self._file.open().read()
    877        minified = self._minify_with_terser(source_content)
    878        return BytesIO(minified)
    879 
    880 
    881 class BaseFinder:
    882    def __init__(self, base, minify=False, minify_js=False, minify_pdfjs=False):
    883        """
    884        Initializes the instance with a reference base directory.
    885 
    886        The optional minify argument specifies whether minification of code
    887        should occur. minify_js is an additional option to control minification
    888        of JavaScript. It requires minify to be True. minify_pdfjs controls
    889        minification of PDF.js files independently.
    890        """
    891        if minify_js and not minify:
    892            raise ValueError("minify_js requires minify.")
    893 
    894        self.base = mozpath.normsep(base)
    895        self._minify = minify
    896        self._minify_js = minify_js
    897        self._minify_pdfjs = minify_pdfjs
    898 
    899    def find(self, pattern):
    900        """
    901        Yield path, BaseFile_instance pairs for all files under the base
    902        directory and its subdirectories that match the given pattern. See the
    903        mozpack.path.match documentation for a description of the handled
    904        patterns.
    905        """
    906        while pattern.startswith("/"):
    907            pattern = pattern[1:]
    908        for p, f in self._find(pattern):
    909            yield p, self._minify_file(p, f)
    910 
    911    def get(self, path):
    912        """Obtain a single file.
    913 
    914        Where ``find`` is tailored towards matching multiple files, this method
    915        is used for retrieving a single file. Use this method when performance
    916        is critical.
    917 
    918        Returns a ``BaseFile`` if at most one file exists or ``None`` otherwise.
    919        """
    920        files = list(self.find(path))
    921        if len(files) != 1:
    922            return None
    923        return files[0][1]
    924 
    925    def __iter__(self):
    926        """
    927        Iterates over all files under the base directory (excluding files
    928        starting with a '.' and files at any level under a directory starting
    929        with a '.').
    930            for path, file in finder:
    931                ...
    932        """
    933        return self.find("")
    934 
    935    def __contains__(self, pattern):
    936        raise RuntimeError(
    937            "'in' operator forbidden for %s. Use contains()." % self.__class__.__name__
    938        )
    939 
    940    def contains(self, pattern):
    941        """
    942        Return whether some files under the base directory match the given
    943        pattern. See the mozpack.path.match documentation for a description of
    944        the handled patterns.
    945        """
    946        return any(self.find(pattern))
    947 
    948    def _minify_file(self, path, file):
    949        """
    950        Return an appropriate MinifiedSomething wrapper for the given BaseFile
    951        instance (file), according to the file type (determined by the given
    952        path), if the FileFinder was created with minification enabled.
    953        Otherwise, just return the given BaseFile instance.
    954        """
    955        if not self._minify or isinstance(file, ExecutableFile):
    956            return file
    957 
    958        if path.endswith((".ftl", ".properties")):
    959            return MinifiedCommentStripped(file)
    960 
    961        if path.endswith((".js", ".jsm", ".mjs")):
    962            file_path = mozpath.normsep(path)
    963            filename = mozpath.basename(file_path)
    964            # Don't minify prefs files because they use a custom parser that's stricter than JS
    965            if filename.endswith("prefs.js") or "/defaults/pref" in file_path:
    966                return file
    967            # PDF.js files are minified based on the minify_pdfjs flag (for now)
    968            if "pdfjs" in file_path and self._minify_pdfjs:
    969                return MinifiedJavaScript(file, path)
    970            elif self._minify_js:
    971                return MinifiedJavaScript(file, path)
    972 
    973        return file
    974 
    975    def _find_helper(self, pattern, files, file_getter):
    976        """Generic implementation of _find.
    977 
    978        A few *Finder implementations share logic for returning results.
    979        This function implements the custom logic.
    980 
    981        The ``file_getter`` argument is a callable that receives a path
    982        that is known to exist. The callable should return a ``BaseFile``
    983        instance.
    984        """
    985        if "*" in pattern:
    986            for p in files:
    987                if mozpath.match(p, pattern):
    988                    yield p, file_getter(p)
    989        elif pattern == "":
    990            for p in files:
    991                yield p, file_getter(p)
    992        elif pattern in files:
    993            yield pattern, file_getter(pattern)
    994        else:
    995            for p in files:
    996                if mozpath.basedir(p, [pattern]) == pattern:
    997                    yield p, file_getter(p)
    998 
    999 
   1000 class FileFinder(BaseFinder):
   1001    """
   1002    Helper to get appropriate BaseFile instances from the file system.
   1003    """
   1004 
   1005    def __init__(
   1006        self,
   1007        base,
   1008        find_executables=False,
   1009        ignore=(),
   1010        ignore_broken_symlinks=False,
   1011        find_dotfiles=False,
   1012        **kargs,
   1013    ):
   1014        """
   1015        Create a FileFinder for files under the given base directory.
   1016 
   1017        The find_executables argument determines whether the finder needs to
   1018        try to guess whether files are executables. Disabling this guessing
   1019        when not necessary can speed up the finder significantly.
   1020 
   1021        ``ignore`` accepts an iterable of patterns to ignore. Entries are
   1022        strings that match paths relative to ``base`` using
   1023        ``mozpath.match()``. This means if an entry corresponds
   1024        to a directory, all files under that directory will be ignored. If
   1025        an entry corresponds to a file, that particular file will be ignored.
   1026        ``ignore_broken_symlinks`` is passed by the packager to work around an
   1027        issue with the build system not cleaning up stale files in some common
   1028        cases. See bug 1297381.
   1029        """
   1030        BaseFinder.__init__(self, base, **kargs)
   1031        self.find_dotfiles = find_dotfiles
   1032        self.find_executables = find_executables
   1033        self.ignore = tuple(mozpath.normsep(path) for path in ignore)
   1034        self.ignore_broken_symlinks = ignore_broken_symlinks
   1035 
   1036    def _find(self, pattern):
   1037        """
   1038        Actual implementation of FileFinder.find(), dispatching to specialized
   1039        member functions depending on what kind of pattern was given.
   1040        Note all files with a name starting with a '.' are ignored when
   1041        scanning directories, but are not ignored when explicitely requested.
   1042        """
   1043        if "*" in pattern:
   1044            return self._find_glob("", mozpath.split(pattern))
   1045        elif os.path.isdir(os.path.join(self.base, pattern)):
   1046            return self._find_dir(pattern)
   1047        else:
   1048            f = self.get(pattern)
   1049            return ((pattern, f),) if f else ()
   1050 
   1051    def _find_dir(self, path):
   1052        """
   1053        Actual implementation of FileFinder.find() when the given pattern
   1054        corresponds to an existing directory under the base directory.
   1055        Ignores file names starting with a '.' under the given path. If the
   1056        path itself has leafs starting with a '.', they are not ignored.
   1057        """
   1058        for p in self.ignore:
   1059            if mozpath.match(path, p):
   1060                return
   1061 
   1062        # The sorted makes the output idempotent. Otherwise, we are
   1063        # likely dependent on filesystem implementation details, such as
   1064        # inode ordering.
   1065        for p in sorted(os.listdir(os.path.join(self.base, path))):
   1066            if p.startswith("."):
   1067                if p in (".", ".."):
   1068                    continue
   1069                if not self.find_dotfiles:
   1070                    continue
   1071            yield from self._find(mozpath.join(path, p))
   1072 
   1073    def get(self, path):
   1074        srcpath = os.path.join(self.base, path)
   1075        if not os.path.lexists(srcpath):
   1076            return None
   1077 
   1078        if self.ignore_broken_symlinks and not os.path.exists(srcpath):
   1079            return None
   1080 
   1081        for p in self.ignore:
   1082            if mozpath.match(path, p):
   1083                return None
   1084 
   1085        if self.find_executables and is_executable(srcpath):
   1086            return ExecutableFile(srcpath)
   1087        else:
   1088            return File(srcpath)
   1089 
   1090    def _find_glob(self, base, pattern):
   1091        """
   1092        Actual implementation of FileFinder.find() when the given pattern
   1093        contains globbing patterns ('*' or '**'). This is meant to be an
   1094        equivalent of:
   1095            for p, f in self:
   1096                if mozpath.match(p, pattern):
   1097                    yield p, f
   1098        but avoids scanning the entire tree.
   1099        """
   1100        if not pattern:
   1101            for p, f in self._find(base):
   1102                yield p, f
   1103        elif pattern[0] == "**":
   1104            for p, f in self._find(base):
   1105                if mozpath.match(p, mozpath.join(*pattern)):
   1106                    yield p, f
   1107        elif "*" in pattern[0]:
   1108            if not os.path.exists(os.path.join(self.base, base)):
   1109                return
   1110 
   1111            for p in self.ignore:
   1112                if mozpath.match(base, p):
   1113                    return
   1114 
   1115            # See above comment w.r.t. sorted() and idempotent behavior.
   1116            for p in sorted(os.listdir(os.path.join(self.base, base))):
   1117                if p.startswith(".") and not pattern[0].startswith("."):
   1118                    continue
   1119                if mozpath.match(p, pattern[0]):
   1120                    for p_, f in self._find_glob(mozpath.join(base, p), pattern[1:]):
   1121                        yield p_, f
   1122        else:
   1123            for p, f in self._find_glob(mozpath.join(base, pattern[0]), pattern[1:]):
   1124                yield p, f
   1125 
   1126 
   1127 class JarFinder(BaseFinder):
   1128    """
   1129    Helper to get appropriate DeflatedFile instances from a JarReader.
   1130    """
   1131 
   1132    def __init__(self, base, reader, **kargs):
   1133        """
   1134        Create a JarFinder for files in the given JarReader. The base argument
   1135        is used as an indication of the Jar file location.
   1136        """
   1137        assert isinstance(reader, JarReader)
   1138        BaseFinder.__init__(self, base, **kargs)
   1139        self._files = OrderedDict((f.filename, f) for f in reader)
   1140 
   1141    def _find(self, pattern):
   1142        """
   1143        Actual implementation of JarFinder.find(), dispatching to specialized
   1144        member functions depending on what kind of pattern was given.
   1145        """
   1146        return self._find_helper(
   1147            pattern, self._files, lambda x: DeflatedFile(self._files[x])
   1148        )
   1149 
   1150 
   1151 class TarFinder(BaseFinder):
   1152    """
   1153    Helper to get files from a TarFile.
   1154    """
   1155 
   1156    def __init__(self, base, tar, **kargs):
   1157        """
   1158        Create a TarFinder for files in the given TarFile. The base argument
   1159        is used as an indication of the Tar file location.
   1160        """
   1161        assert isinstance(tar, TarFile)
   1162        self._tar = tar
   1163        BaseFinder.__init__(self, base, **kargs)
   1164        self._files = OrderedDict((f.name, f) for f in tar if f.isfile())
   1165 
   1166    def _find(self, pattern):
   1167        """
   1168        Actual implementation of TarFinder.find(), dispatching to specialized
   1169        member functions depending on what kind of pattern was given.
   1170        """
   1171        return self._find_helper(
   1172            pattern, self._files, lambda x: ExtractedTarFile(self._tar, self._files[x])
   1173        )
   1174 
   1175 
   1176 class ComposedFinder(BaseFinder):
   1177    """
   1178    Composes multiple File Finders in some sort of virtual file system.
   1179 
   1180    A ComposedFinder is initialized from a dictionary associating paths
   1181    to `*Finder instances.`
   1182 
   1183    Note this could be optimized to be smarter than getting all the files
   1184    in advance.
   1185    """
   1186 
   1187    def __init__(self, finders):
   1188        # Can't import globally, because of the dependency of mozpack.copier
   1189        # on this module.
   1190        from mozpack.copier import FileRegistry
   1191 
   1192        self.files = FileRegistry()
   1193 
   1194        for base, finder in sorted(finders.items()):
   1195            if self.files.contains(base):
   1196                self.files.remove(base)
   1197            for p, f in finder.find(""):
   1198                self.files.add(mozpath.join(base, p), f)
   1199 
   1200    def find(self, pattern):
   1201        for p in self.files.match(pattern):
   1202            yield p, self.files[p]
   1203 
   1204 
   1205 class MercurialFile(BaseFile):
   1206    """File class for holding data from Mercurial."""
   1207 
   1208    def __init__(self, client, rev, path):
   1209        self._content = client.cat([path.encode()], rev=rev.encode())
   1210 
   1211    def open(self):
   1212        return BytesIO(self._content)
   1213 
   1214    def read(self):
   1215        return self._content
   1216 
   1217 
   1218 class MercurialRevisionFinder(BaseFinder):
   1219    """A finder that operates on a specific Mercurial revision."""
   1220 
   1221    def __init__(self, repo, rev=".", recognize_repo_paths=False, **kwargs):
   1222        """Create a finder attached to a specific revision in a repository.
   1223 
   1224        If no revision is given, open the parent of the working directory.
   1225 
   1226        ``recognize_repo_paths`` will enable a mode where ``.get()`` will
   1227        recognize full paths that include the repo's path. Typically Finder
   1228        instances are "bound" to a base directory and paths are relative to
   1229        that directory. This mode changes that. When this mode is activated,
   1230        ``.find()`` will not work! This mode exists to support the moz.build
   1231        reader, which uses absolute paths instead of relative paths. The reader
   1232        should eventually be rewritten to use relative paths and this hack
   1233        should be removed (TODO bug 1171069).
   1234        """
   1235        if not hglib:
   1236            raise Exception("hglib package not found")
   1237 
   1238        super().__init__(base=repo, **kwargs)
   1239 
   1240        self._root = mozpath.normpath(repo).rstrip("/")
   1241        self._recognize_repo_paths = recognize_repo_paths
   1242 
   1243        # We change directories here otherwise we have to deal with relative
   1244        # paths.
   1245        oldcwd = os.getcwd()
   1246        os.chdir(self._root)
   1247        try:
   1248            self._client = hglib.open(path=repo, encoding=b"utf-8")
   1249        finally:
   1250            os.chdir(oldcwd)
   1251        self._rev = rev if rev is not None else "."
   1252        self._files = OrderedDict()
   1253 
   1254        # Immediately populate the list of files in the repo since nearly every
   1255        # operation requires this list.
   1256        out = self._client.rawcommand([
   1257            b"files",
   1258            b"--rev",
   1259            self._rev.encode(),
   1260        ])
   1261        for relpath in out.splitlines():
   1262            # Mercurial may use \ as path separator on Windows. So use
   1263            # normpath().
   1264            self._files[mozpath.normpath(relpath).decode()] = None
   1265 
   1266    def _find(self, pattern):
   1267        if self._recognize_repo_paths:
   1268            raise NotImplementedError("cannot use find with recognize_repo_path")
   1269 
   1270        return self._find_helper(pattern, self._files, self._get)
   1271 
   1272    def get(self, path):
   1273        path = mozpath.normpath(path)
   1274        if self._recognize_repo_paths:
   1275            if not path.startswith(self._root):
   1276                raise ValueError(
   1277                    "lookups in recognize_repo_paths mode must be "
   1278                    "prefixed with repo path: %s" % path
   1279                )
   1280            path = path[len(self._root) + 1 :]
   1281 
   1282        try:
   1283            return self._get(path)
   1284        except KeyError:
   1285            return None
   1286 
   1287    def _get(self, path):
   1288        # We lazy populate self._files because potentially creating tens of
   1289        # thousands of MercurialFile instances for every file in the repo is
   1290        # inefficient.
   1291        f = self._files[path]
   1292        if not f:
   1293            f = MercurialFile(self._client, self._rev, path)
   1294            self._files[path] = f
   1295 
   1296        return f
   1297 
   1298 
   1299 class FileListFinder(BaseFinder):
   1300    """Finder for a literal list of file names."""
   1301 
   1302    def __init__(self, files):
   1303        self._files = sorted(files)
   1304 
   1305    @memoize
   1306    def _match(self, pattern):
   1307        """Return a sorted list of all files matching the given pattern."""
   1308        # We don't use the utility _find_helper method because it's not tuned
   1309        # for performance in the way that we would like this class to be. That's
   1310        # a possible avenue for refactoring here.
   1311        ret = []
   1312        # We do this as an optimization to figure out where in the sorted list
   1313        # to search and where to stop searching.
   1314        components = pattern.split("/")
   1315        prefix = "/".join(takewhile(lambda s: "*" not in s, components))
   1316        start = bisect.bisect_left(self._files, prefix)
   1317        for i in range(start, len(self._files)):
   1318            f = self._files[i]
   1319            if not f.startswith(prefix):
   1320                break
   1321            # Skip hidden files while scanning.
   1322            if "/." in f[len(prefix) :]:
   1323                continue
   1324            if mozpath.match(f, pattern):
   1325                ret.append(f)
   1326        return ret
   1327 
   1328    def find(self, pattern):
   1329        pattern = pattern.strip("/")
   1330        for path in self._match(pattern):
   1331            yield path, File(path)