files.py (45469B)
1 # This Source Code Form is subject to the terms of the Mozilla Public 2 # License, v. 2.0. If a copy of the MPL was not distributed with this 3 # file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 5 import bisect 6 import errno 7 import inspect 8 import json 9 import os 10 import platform 11 import shutil 12 import stat 13 import subprocess 14 import tempfile 15 import uuid 16 from collections import OrderedDict 17 from io import BytesIO 18 from itertools import chain, takewhile 19 from pathlib import Path 20 from tarfile import TarFile, TarInfo 21 from tempfile import mkstemp 22 23 import mozpack.path as mozpath 24 from mozbuild import makeutil 25 from mozbuild.nodeutil import package_setup 26 from mozbuild.preprocessor import Preprocessor 27 from mozbuild.util import FileAvoidWrite, ensure_unicode, memoize 28 from mozpack.chrome.manifest import ManifestEntry, ManifestInterfaces 29 from mozpack.errors import ErrorMessage, errors 30 from mozpack.executables import elfhack, is_executable, may_elfhack, may_strip, strip 31 from mozpack.mozjar import JarReader 32 33 try: 34 import hglib 35 except ImportError: 36 hglib = None 37 38 39 # For clean builds, copying files on win32 using CopyFile through ctypes is 40 # ~2x as fast as using shutil.copyfile. 41 if platform.system() != "Windows": 42 _copyfile = shutil.copyfile 43 else: 44 import ctypes 45 46 _kernel32 = ctypes.windll.kernel32 47 _CopyFileA = _kernel32.CopyFileA 48 _CopyFileW = _kernel32.CopyFileW 49 50 def _copyfile(src, dest): 51 # False indicates `dest` should be overwritten if it exists already. 52 if isinstance(src, str) and isinstance(dest, str): 53 _CopyFileW(src, dest, False) 54 elif isinstance(src, str) and isinstance(dest, str): 55 _CopyFileA(src, dest, False) 56 else: 57 raise TypeError("mismatched path types!") 58 59 60 # Helper function; ensures we always open files with the correct encoding when 61 # opening them in text mode. 62 def _open(path, mode="r"): 63 if "b" not in mode: 64 return open(path, mode, encoding="utf-8") 65 return open(path, mode) 66 67 68 class Dest: 69 """ 70 Helper interface for BaseFile.copy. The interface works as follows: 71 - read() and write() can be used to sequentially read/write from the underlying file. 72 - a call to read() after a write() will re-open the underlying file and read from it. 73 - a call to write() after a read() will re-open the underlying file, emptying it, and write to it. 74 """ 75 76 def __init__(self, path): 77 self.file = None 78 self.mode = None 79 self.path = ensure_unicode(path) 80 81 @property 82 def name(self): 83 return self.path 84 85 def read(self, length=-1): 86 if self.mode != "r": 87 self.file = _open(self.path, mode="rb") 88 self.mode = "r" 89 return self.file.read(length) 90 91 def write(self, data): 92 if self.mode != "w": 93 self.file = _open(self.path, mode="wb") 94 self.mode = "w" 95 if isinstance(data, str): 96 data = data.encode() 97 return self.file.write(data) 98 99 def exists(self): 100 return os.path.exists(self.path) 101 102 def close(self): 103 if self.mode: 104 self.mode = None 105 self.file.close() 106 self.file = None 107 108 109 class BaseFile: 110 """ 111 Base interface and helper for file copying. Derived class may implement 112 their own copy function, or rely on BaseFile.copy using the open() member 113 function and/or the path property. 114 """ 115 116 @staticmethod 117 def is_older(first, second): 118 """ 119 Compares the modification time of two files, and returns whether the 120 ``first`` file is older than the ``second`` file. 121 """ 122 # os.path.getmtime returns a result in seconds with precision up to 123 # the microsecond. But microsecond is too precise because 124 # shutil.copystat only copies milliseconds, and seconds is not 125 # enough precision. 126 return int(os.path.getmtime(first) * 1000) <= int( 127 os.path.getmtime(second) * 1000 128 ) 129 130 @staticmethod 131 def any_newer(dest, inputs): 132 """ 133 Compares the modification time of ``dest`` to multiple input files, and 134 returns whether any of the ``inputs`` is newer (has a later mtime) than 135 ``dest``. 136 """ 137 # os.path.getmtime returns a result in seconds with precision up to 138 # the microsecond. But microsecond is too precise because 139 # shutil.copystat only copies milliseconds, and seconds is not 140 # enough precision. 141 dest_mtime = int(os.path.getmtime(dest) * 1000) 142 for input in inputs: 143 try: 144 src_mtime = int(os.path.getmtime(input) * 1000) 145 except OSError as e: 146 if e.errno == errno.ENOENT: 147 # If an input file was removed, we should update. 148 return True 149 raise 150 if dest_mtime < src_mtime: 151 return True 152 return False 153 154 @staticmethod 155 def normalize_mode(mode): 156 # Normalize file mode: 157 # - keep file type (e.g. S_IFREG) 158 ret = stat.S_IFMT(mode) 159 # - expand user read and execute permissions to everyone 160 if mode & 0o0400: 161 ret |= 0o0444 162 if mode & 0o0100: 163 ret |= 0o0111 164 # - keep user write permissions 165 if mode & 0o0200: 166 ret |= 0o0200 167 # - leave away sticky bit, setuid, setgid 168 return ret 169 170 def copy(self, dest, skip_if_older=True): 171 """ 172 Copy the BaseFile content to the destination given as a string or a 173 Dest instance. Avoids replacing existing files if the BaseFile content 174 matches that of the destination, or in case of plain files, if the 175 destination is newer than the original file. This latter behaviour is 176 disabled when skip_if_older is False. 177 Returns whether a copy was actually performed (True) or not (False). 178 """ 179 if isinstance(dest, str): 180 dest = Dest(dest) 181 else: 182 assert isinstance(dest, Dest) 183 184 can_skip_content_check = False 185 if not dest.exists(): 186 can_skip_content_check = True 187 elif getattr(self, "path", None) and getattr(dest, "path", None): 188 if skip_if_older and BaseFile.is_older(self.path, dest.path): 189 return False 190 elif os.path.getsize(self.path) != os.path.getsize(dest.path): 191 can_skip_content_check = True 192 193 if can_skip_content_check: 194 if getattr(self, "path", None) and getattr(dest, "path", None): 195 # The destination directory must exist, or CopyFile will fail. 196 destdir = os.path.dirname(dest.path) 197 os.makedirs(destdir, exist_ok=True) 198 _copyfile(self.path, dest.path) 199 shutil.copystat(self.path, dest.path) 200 else: 201 # Ensure the file is always created 202 if not dest.exists(): 203 dest.write(b"") 204 shutil.copyfileobj(self.open(), dest) 205 return True 206 207 src = self.open() 208 accumulated_src_content = [] 209 while True: 210 dest_content = dest.read(32768) 211 src_content = src.read(32768) 212 accumulated_src_content.append(src_content) 213 if len(dest_content) == len(src_content) == 0: 214 break 215 # If the read content differs between origin and destination, 216 # write what was read up to now, and copy the remainder. 217 if dest_content != src_content: 218 dest.write(b"".join(accumulated_src_content)) 219 shutil.copyfileobj(src, dest) 220 break 221 if hasattr(self, "path") and hasattr(dest, "path"): 222 shutil.copystat(self.path, dest.path) 223 return True 224 225 def open(self): 226 """ 227 Return a file-like object allowing to read() the content of the 228 associated file. This is meant to be overloaded in subclasses to return 229 a custom file-like object. 230 """ 231 assert self.path is not None 232 return open(self.path, "rb") 233 234 def read(self): 235 raise NotImplementedError("BaseFile.read() not implemented. Bug 1170329.") 236 237 def size(self): 238 """Returns size of the entry. 239 240 Derived classes are highly encouraged to override this with a more 241 optimal implementation. 242 """ 243 return len(self.read()) 244 245 @property 246 def mode(self): 247 """ 248 Return the file's unix mode, or None if it has no meaning. 249 """ 250 return None 251 252 def inputs(self): 253 """ 254 Return an iterable of the input file paths that impact this output file. 255 """ 256 raise NotImplementedError("BaseFile.inputs() not implemented.") 257 258 259 class File(BaseFile): 260 """ 261 File class for plain files. 262 """ 263 264 def __init__(self, path): 265 self.path = ensure_unicode(path) 266 267 @property 268 def mode(self): 269 """ 270 Return the file's unix mode, as returned by os.stat().st_mode. 271 """ 272 if platform.system() == "Windows": 273 return None 274 assert self.path is not None 275 mode = os.stat(self.path).st_mode 276 return self.normalize_mode(mode) 277 278 def read(self): 279 """Return the contents of the file.""" 280 with open(self.path, "rb") as fh: 281 return fh.read() 282 283 def size(self): 284 return os.stat(self.path).st_size 285 286 def inputs(self): 287 return (self.path,) 288 289 290 class ExecutableFile(File): 291 """ 292 File class for executable and library files on OS/2, OS/X and ELF systems. 293 (see mozpack.executables.is_executable documentation). 294 """ 295 296 def __init__(self, path): 297 File.__init__(self, path) 298 299 def copy(self, dest, skip_if_older=True): 300 real_dest = dest 301 if not isinstance(dest, str): 302 fd, dest = mkstemp() 303 os.close(fd) 304 os.remove(dest) 305 assert isinstance(dest, str) 306 # If File.copy didn't actually copy because dest is newer, check the 307 # file sizes. If dest is smaller, it means it is already stripped and 308 # elfhacked, so we can skip. 309 if not File.copy(self, dest, skip_if_older) and os.path.getsize( 310 self.path 311 ) > os.path.getsize(dest): 312 return False 313 try: 314 if may_strip(dest): 315 strip(dest) 316 if may_elfhack(dest): 317 elfhack(dest) 318 except ErrorMessage: 319 os.remove(dest) 320 raise 321 322 if real_dest != dest: 323 f = File(dest) 324 ret = f.copy(real_dest, skip_if_older) 325 os.remove(dest) 326 return ret 327 return True 328 329 330 class AbsoluteSymlinkFile(File): 331 """File class that is copied by symlinking (if available). 332 333 This class only works if the target path is absolute. 334 """ 335 336 def __init__(self, path): 337 if not os.path.isabs(path): 338 raise ValueError("Symlink target not absolute: %s" % path) 339 340 File.__init__(self, path) 341 342 def copy(self, dest, skip_if_older=True): 343 assert isinstance(dest, str) 344 345 # The logic in this function is complicated by the fact that symlinks 346 # aren't universally supported. So, where symlinks aren't supported, we 347 # fall back to file copying. Keep in mind that symlink support is 348 # per-filesystem, not per-OS. 349 350 # Handle the simple case where symlinks are definitely not supported by 351 # falling back to file copy. 352 if not hasattr(os, "symlink"): 353 return File.copy(self, dest, skip_if_older=skip_if_older) 354 355 # Always verify the symlink target path exists. 356 if not os.path.exists(self.path): 357 errors.fatal("Symlink target path does not exist: %s" % self.path) 358 359 st = None 360 361 try: 362 st = os.lstat(dest) 363 except OSError as ose: 364 if ose.errno != errno.ENOENT: 365 raise 366 367 # If the dest is a symlink pointing to us, we have nothing to do. 368 # If it's the wrong symlink, the filesystem must support symlinks, 369 # so we replace with a proper symlink. 370 if st and stat.S_ISLNK(st.st_mode): 371 link = os.readlink(dest) 372 if link == self.path: 373 return False 374 375 os.remove(dest) 376 os.symlink(self.path, dest) 377 return True 378 379 # If the destination doesn't exist, we try to create a symlink. If that 380 # fails, we fall back to copy code. 381 if not st: 382 try: 383 os.symlink(self.path, dest) 384 return True 385 except OSError: 386 return File.copy(self, dest, skip_if_older=skip_if_older) 387 388 # Now the complicated part. If the destination exists, we could be 389 # replacing a file with a symlink. Or, the filesystem may not support 390 # symlinks. We want to minimize I/O overhead for performance reasons, 391 # so we keep the existing destination file around as long as possible. 392 # A lot of the system calls would be eliminated if we cached whether 393 # symlinks are supported. However, even if we performed a single 394 # up-front test of whether the root of the destination directory 395 # supports symlinks, there's no guarantee that all operations for that 396 # dest (or source) would be on the same filesystem and would support 397 # symlinks. 398 # 399 # Our strategy is to attempt to create a new symlink with a random 400 # name. If that fails, we fall back to copy mode. If that works, we 401 # remove the old destination and move the newly-created symlink into 402 # its place. 403 404 temp_dest = os.path.join(os.path.dirname(dest), str(uuid.uuid4())) 405 try: 406 os.symlink(self.path, temp_dest) 407 # TODO Figure out exactly how symlink creation fails and only trap 408 # that. 409 except OSError: 410 return File.copy(self, dest, skip_if_older=skip_if_older) 411 412 # If removing the original file fails, don't forget to clean up the 413 # temporary symlink. 414 try: 415 os.remove(dest) 416 except OSError: 417 os.remove(temp_dest) 418 raise 419 420 os.rename(temp_dest, dest) 421 return True 422 423 424 class HardlinkFile(File): 425 """File class that is copied by hard linking (if available) 426 427 This is similar to the AbsoluteSymlinkFile, but with hard links. The symlink 428 implementation requires paths to be absolute, because they are resolved at 429 read time, which makes relative paths messy. Hard links resolve paths at 430 link-creation time, so relative paths are fine. 431 """ 432 433 def copy(self, dest, skip_if_older=True): 434 assert isinstance(dest, str) 435 436 if not hasattr(os, "link"): 437 return super().copy(dest, skip_if_older=skip_if_older) 438 439 try: 440 path_st = os.stat(self.path) 441 except OSError as e: 442 if e.errno == errno.ENOENT: 443 errors.fatal("Hard link target path does not exist: %s" % self.path) 444 else: 445 raise 446 447 st = None 448 try: 449 st = os.lstat(dest) 450 except OSError as e: 451 if e.errno != errno.ENOENT: 452 raise 453 454 if st: 455 # The dest already points to the right place. 456 if st.st_dev == path_st.st_dev and st.st_ino == path_st.st_ino: 457 return False 458 # The dest exists and it points to the wrong place 459 os.remove(dest) 460 461 # At this point, either the dest used to exist and we just deleted it, 462 # or it never existed. We can now safely create the hard link. 463 try: 464 os.link(self.path, dest) 465 except OSError: 466 # If we can't hard link, fall back to copying 467 return super().copy(dest, skip_if_older=skip_if_older) 468 return True 469 470 471 class ExistingFile(BaseFile): 472 """ 473 File class that represents a file that may exist but whose content comes 474 from elsewhere. 475 476 This purpose of this class is to account for files that are installed via 477 external means. It is typically only used in manifests or in registries to 478 account for files. 479 480 When asked to copy, this class does nothing because nothing is known about 481 the source file/data. 482 483 Instances of this class come in two flavors: required and optional. If an 484 existing file is required, it must exist during copy() or an error is 485 raised. 486 """ 487 488 def __init__(self, required): 489 self.required = required 490 491 def copy(self, dest, skip_if_older=True): 492 if isinstance(dest, str): 493 dest = Dest(dest) 494 else: 495 assert isinstance(dest, Dest) 496 497 if not self.required: 498 return 499 500 if not dest.exists(): 501 errors.fatal("Required existing file doesn't exist: %s" % dest.path) 502 503 def inputs(self): 504 return () 505 506 507 class PreprocessedFile(BaseFile): 508 """ 509 File class for a file that is preprocessed. PreprocessedFile.copy() runs 510 the preprocessor on the file to create the output. 511 """ 512 513 def __init__( 514 self, 515 path, 516 depfile_path, 517 marker, 518 defines, 519 extra_depends=None, 520 silence_missing_directive_warnings=False, 521 ): 522 self.path = ensure_unicode(path) 523 self.depfile = ensure_unicode(depfile_path) 524 self.marker = marker 525 self.defines = defines 526 self.extra_depends = list(extra_depends or []) 527 self.silence_missing_directive_warnings = silence_missing_directive_warnings 528 529 def inputs(self): 530 pp = Preprocessor(defines=self.defines, marker=self.marker) 531 pp.setSilenceDirectiveWarnings(self.silence_missing_directive_warnings) 532 533 with _open(self.path, "r") as input: 534 with _open(os.devnull, "w") as output: 535 pp.processFile(input=input, output=output) 536 537 # This always yields at least self.path. 538 return pp.includes 539 540 def copy(self, dest, skip_if_older=True): 541 """ 542 Invokes the preprocessor to create the destination file. 543 """ 544 if isinstance(dest, str): 545 dest = Dest(dest) 546 else: 547 assert isinstance(dest, Dest) 548 549 # We have to account for the case where the destination exists and is a 550 # symlink to something. Since we know the preprocessor is certainly not 551 # going to create a symlink, we can just remove the existing one. If the 552 # destination is not a symlink, we leave it alone, since we're going to 553 # overwrite its contents anyway. 554 # If symlinks aren't supported at all, we can skip this step. 555 if hasattr(os, "symlink") and os.path.islink(dest.path): 556 os.remove(dest.path) 557 558 pp_deps = set(self.extra_depends) 559 560 # If a dependency file was specified, and it exists, add any 561 # dependencies from that file to our list. 562 if self.depfile and os.path.exists(self.depfile): 563 target = mozpath.normpath(dest.name) 564 with _open(self.depfile, "rt") as fileobj: 565 for rule in makeutil.read_dep_makefile(fileobj): 566 if target in rule.targets(): 567 pp_deps.update(rule.dependencies()) 568 569 skip = False 570 if dest.exists() and skip_if_older: 571 # If a dependency file was specified, and it doesn't exist, 572 # assume that the preprocessor needs to be rerun. That will 573 # regenerate the dependency file. 574 if self.depfile and not os.path.exists(self.depfile): 575 skip = False 576 else: 577 skip = not BaseFile.any_newer(dest.path, pp_deps) 578 579 if skip: 580 return False 581 582 deps_out = None 583 if self.depfile: 584 deps_out = FileAvoidWrite(self.depfile) 585 pp = Preprocessor(defines=self.defines, marker=self.marker) 586 pp.setSilenceDirectiveWarnings(self.silence_missing_directive_warnings) 587 588 with _open(self.path, "r") as input: 589 pp.processFile(input=input, output=dest, depfile=deps_out) 590 591 dest.close() 592 if self.depfile: 593 deps_out.close() 594 595 return True 596 597 598 class GeneratedFile(BaseFile): 599 """ 600 File class for content with no previous existence on the filesystem. 601 """ 602 603 def __init__(self, content): 604 self._content = content 605 606 @property 607 def content(self): 608 if inspect.isfunction(self._content): 609 self._content = self._content() 610 if isinstance(self._content, str): 611 return self._content.encode() 612 return self._content 613 614 @content.setter 615 def content(self, content): 616 self._content = content 617 618 def open(self): 619 return BytesIO(self.content) 620 621 def read(self): 622 return self.content 623 624 def size(self): 625 return len(self.content) 626 627 def inputs(self): 628 return () 629 630 631 class DeflatedFile(BaseFile): 632 """ 633 File class for members of a jar archive. DeflatedFile.copy() effectively 634 extracts the file from the jar archive. 635 """ 636 637 def __init__(self, file): 638 from mozpack.mozjar import JarFileReader 639 640 assert isinstance(file, JarFileReader) 641 self.file = file 642 643 def open(self): 644 self.file.seek(0) 645 return self.file 646 647 648 class ExtractedTarFile(GeneratedFile): 649 """ 650 File class for members of a tar archive. Contents of the underlying file 651 are extracted immediately and stored in memory. 652 """ 653 654 def __init__(self, tar, info): 655 assert isinstance(info, TarInfo) 656 assert isinstance(tar, TarFile) 657 GeneratedFile.__init__(self, tar.extractfile(info).read()) 658 self._unix_mode = self.normalize_mode(info.mode) 659 660 @property 661 def mode(self): 662 return self._unix_mode 663 664 def read(self): 665 return self.content 666 667 668 class ManifestFile(BaseFile): 669 """ 670 File class for a manifest file. It takes individual manifest entries (using 671 the add() and remove() member functions), and adjusts them to be relative 672 to the base path for the manifest, given at creation. 673 Example: 674 There is a manifest entry "content foobar foobar/content/" relative 675 to "foobar/chrome". When packaging, the entry will be stored in 676 jar:foobar/omni.ja!/chrome/chrome.manifest, which means the entry 677 will have to be relative to "chrome" instead of "foobar/chrome". This 678 doesn't really matter when serializing the entry, since this base path 679 is not written out, but it matters when moving the entry at the same 680 time, e.g. to jar:foobar/omni.ja!/chrome.manifest, which we don't do 681 currently but could in the future. 682 """ 683 684 def __init__(self, base, entries=None): 685 self._base = base 686 self._entries = [] 687 self._interfaces = [] 688 for e in entries or []: 689 self.add(e) 690 691 def add(self, entry): 692 """ 693 Add the given entry to the manifest. Entries are rebased at open() time 694 instead of add() time so that they can be more easily remove()d. 695 """ 696 assert isinstance(entry, ManifestEntry) 697 if isinstance(entry, ManifestInterfaces): 698 self._interfaces.append(entry) 699 else: 700 self._entries.append(entry) 701 702 def remove(self, entry): 703 """ 704 Remove the given entry from the manifest. 705 """ 706 assert isinstance(entry, ManifestEntry) 707 if isinstance(entry, ManifestInterfaces): 708 self._interfaces.remove(entry) 709 else: 710 self._entries.remove(entry) 711 712 def open(self): 713 """ 714 Return a file-like object allowing to read() the serialized content of 715 the manifest. 716 """ 717 content = "".join( 718 "%s\n" % e.rebase(self._base) 719 for e in chain(self._entries, self._interfaces) 720 ) 721 return BytesIO(content.encode()) 722 723 def __iter__(self): 724 """ 725 Iterate over entries in the manifest file. 726 """ 727 return chain(self._entries, self._interfaces) 728 729 def isempty(self): 730 """ 731 Return whether there are manifest entries to write 732 """ 733 return len(self._entries) + len(self._interfaces) == 0 734 735 736 class MinifiedCommentStripped(BaseFile): 737 """ 738 File class for content minified by stripping comments. This wraps around a 739 BaseFile instance, and removes lines starting with a # from its content. 740 """ 741 742 def __init__(self, file): 743 assert isinstance(file, BaseFile) 744 self._file = file 745 746 def open(self): 747 """ 748 Return a file-like object allowing to read() the minified content of 749 the underlying file. 750 """ 751 content = "".join( 752 l 753 for l in [s.decode() for s in self._file.open().readlines()] 754 if not l.startswith("#") 755 ) 756 return BytesIO(content.encode()) 757 758 759 class MinifiedJavaScript(BaseFile): 760 """ 761 Minify JavaScript files using Terser while preserving 762 class and function names for better debugging. 763 """ 764 765 TERSER_CONFIG = { 766 "parse": { 767 "ecma": 2020, 768 "module": True, 769 }, 770 "compress": { 771 "unused": True, 772 "passes": 3, 773 "ecma": 2020, 774 }, 775 "mangle": { 776 "keep_classnames": True, # Preserve class names 777 "keep_fnames": True, # Preserve function names 778 }, 779 "format": { 780 "comments": "/@lic|webpackIgnore|@vite-ignore/i", 781 "ascii_only": True, 782 "ecma": 2020, 783 }, 784 "sourceMap": False, 785 } 786 787 def __init__(self, file, filepath): 788 """ 789 Initialize with a BaseFile instance to minify. 790 """ 791 self._file = file 792 self._filepath = filepath 793 794 def _minify_with_terser(self, source_content): 795 """ 796 Minify JavaScript content using Terser 797 """ 798 if len(source_content) == 0: 799 return source_content 800 801 import buildconfig 802 803 node_path = buildconfig.substs.get("NODEJS") 804 if not node_path: 805 errors.fatal("NODEJS not found in build configuration") 806 807 topsrcdir = Path(buildconfig.topsrcdir) 808 809 if os.environ.get("MOZ_AUTOMATION"): 810 fetches_terser = ( 811 Path(os.environ["MOZ_FETCHES_DIR"]) 812 / "terser" 813 / "node_modules" 814 / "terser" 815 / "bin" 816 / "terser" 817 ) 818 if fetches_terser.exists(): 819 terser_path = fetches_terser 820 else: 821 errors.fatal(f"Terser toolchain not found at {fetches_terser}.") 822 else: 823 terser_dir = topsrcdir / "tools" / "terser" 824 terser_path = terser_dir / "node_modules" / "terser" / "bin" / "terser" 825 826 if not terser_path.exists(): 827 # Automatically set up node_modules if terser is not found 828 package_setup(str(terser_dir), "terser") 829 830 # Verify that terser is now available after setup 831 if not terser_path.exists(): 832 errors.fatal( 833 f"Terser is required for JavaScript minification but could not be installed at {terser_path}. " 834 "Package setup may have failed." 835 ) 836 837 terser_cmd = [node_path, str(terser_path)] 838 839 with tempfile.TemporaryDirectory() as temp_dir: 840 temp_path = Path(temp_dir) 841 config_path = temp_path / "terser_config.json" 842 source_path = temp_path / "source.js" 843 844 config_path.write_text(json.dumps(self.TERSER_CONFIG), encoding="utf-8") 845 source_path.write_bytes(source_content) 846 847 try: 848 result = subprocess.run( 849 terser_cmd 850 + [ 851 source_path, 852 "--config-file", 853 config_path, 854 ], 855 capture_output=True, 856 check=False, 857 ) 858 859 if result.returncode == 0: 860 return result.stdout 861 else: 862 error_msg = result.stderr.decode("utf-8", errors="ignore") 863 errors.error( 864 f"Terser minification failed for {self._filepath}: {error_msg}" 865 ) 866 return source_content 867 868 except subprocess.SubprocessError as e: 869 errors.error(f"Error running Terser for {self._filepath}: {e}") 870 return source_content 871 872 def open(self): 873 """ 874 Return a file-like object with the minified content. 875 """ 876 source_content = self._file.open().read() 877 minified = self._minify_with_terser(source_content) 878 return BytesIO(minified) 879 880 881 class BaseFinder: 882 def __init__(self, base, minify=False, minify_js=False, minify_pdfjs=False): 883 """ 884 Initializes the instance with a reference base directory. 885 886 The optional minify argument specifies whether minification of code 887 should occur. minify_js is an additional option to control minification 888 of JavaScript. It requires minify to be True. minify_pdfjs controls 889 minification of PDF.js files independently. 890 """ 891 if minify_js and not minify: 892 raise ValueError("minify_js requires minify.") 893 894 self.base = mozpath.normsep(base) 895 self._minify = minify 896 self._minify_js = minify_js 897 self._minify_pdfjs = minify_pdfjs 898 899 def find(self, pattern): 900 """ 901 Yield path, BaseFile_instance pairs for all files under the base 902 directory and its subdirectories that match the given pattern. See the 903 mozpack.path.match documentation for a description of the handled 904 patterns. 905 """ 906 while pattern.startswith("/"): 907 pattern = pattern[1:] 908 for p, f in self._find(pattern): 909 yield p, self._minify_file(p, f) 910 911 def get(self, path): 912 """Obtain a single file. 913 914 Where ``find`` is tailored towards matching multiple files, this method 915 is used for retrieving a single file. Use this method when performance 916 is critical. 917 918 Returns a ``BaseFile`` if at most one file exists or ``None`` otherwise. 919 """ 920 files = list(self.find(path)) 921 if len(files) != 1: 922 return None 923 return files[0][1] 924 925 def __iter__(self): 926 """ 927 Iterates over all files under the base directory (excluding files 928 starting with a '.' and files at any level under a directory starting 929 with a '.'). 930 for path, file in finder: 931 ... 932 """ 933 return self.find("") 934 935 def __contains__(self, pattern): 936 raise RuntimeError( 937 "'in' operator forbidden for %s. Use contains()." % self.__class__.__name__ 938 ) 939 940 def contains(self, pattern): 941 """ 942 Return whether some files under the base directory match the given 943 pattern. See the mozpack.path.match documentation for a description of 944 the handled patterns. 945 """ 946 return any(self.find(pattern)) 947 948 def _minify_file(self, path, file): 949 """ 950 Return an appropriate MinifiedSomething wrapper for the given BaseFile 951 instance (file), according to the file type (determined by the given 952 path), if the FileFinder was created with minification enabled. 953 Otherwise, just return the given BaseFile instance. 954 """ 955 if not self._minify or isinstance(file, ExecutableFile): 956 return file 957 958 if path.endswith((".ftl", ".properties")): 959 return MinifiedCommentStripped(file) 960 961 if path.endswith((".js", ".jsm", ".mjs")): 962 file_path = mozpath.normsep(path) 963 filename = mozpath.basename(file_path) 964 # Don't minify prefs files because they use a custom parser that's stricter than JS 965 if filename.endswith("prefs.js") or "/defaults/pref" in file_path: 966 return file 967 # PDF.js files are minified based on the minify_pdfjs flag (for now) 968 if "pdfjs" in file_path and self._minify_pdfjs: 969 return MinifiedJavaScript(file, path) 970 elif self._minify_js: 971 return MinifiedJavaScript(file, path) 972 973 return file 974 975 def _find_helper(self, pattern, files, file_getter): 976 """Generic implementation of _find. 977 978 A few *Finder implementations share logic for returning results. 979 This function implements the custom logic. 980 981 The ``file_getter`` argument is a callable that receives a path 982 that is known to exist. The callable should return a ``BaseFile`` 983 instance. 984 """ 985 if "*" in pattern: 986 for p in files: 987 if mozpath.match(p, pattern): 988 yield p, file_getter(p) 989 elif pattern == "": 990 for p in files: 991 yield p, file_getter(p) 992 elif pattern in files: 993 yield pattern, file_getter(pattern) 994 else: 995 for p in files: 996 if mozpath.basedir(p, [pattern]) == pattern: 997 yield p, file_getter(p) 998 999 1000 class FileFinder(BaseFinder): 1001 """ 1002 Helper to get appropriate BaseFile instances from the file system. 1003 """ 1004 1005 def __init__( 1006 self, 1007 base, 1008 find_executables=False, 1009 ignore=(), 1010 ignore_broken_symlinks=False, 1011 find_dotfiles=False, 1012 **kargs, 1013 ): 1014 """ 1015 Create a FileFinder for files under the given base directory. 1016 1017 The find_executables argument determines whether the finder needs to 1018 try to guess whether files are executables. Disabling this guessing 1019 when not necessary can speed up the finder significantly. 1020 1021 ``ignore`` accepts an iterable of patterns to ignore. Entries are 1022 strings that match paths relative to ``base`` using 1023 ``mozpath.match()``. This means if an entry corresponds 1024 to a directory, all files under that directory will be ignored. If 1025 an entry corresponds to a file, that particular file will be ignored. 1026 ``ignore_broken_symlinks`` is passed by the packager to work around an 1027 issue with the build system not cleaning up stale files in some common 1028 cases. See bug 1297381. 1029 """ 1030 BaseFinder.__init__(self, base, **kargs) 1031 self.find_dotfiles = find_dotfiles 1032 self.find_executables = find_executables 1033 self.ignore = tuple(mozpath.normsep(path) for path in ignore) 1034 self.ignore_broken_symlinks = ignore_broken_symlinks 1035 1036 def _find(self, pattern): 1037 """ 1038 Actual implementation of FileFinder.find(), dispatching to specialized 1039 member functions depending on what kind of pattern was given. 1040 Note all files with a name starting with a '.' are ignored when 1041 scanning directories, but are not ignored when explicitely requested. 1042 """ 1043 if "*" in pattern: 1044 return self._find_glob("", mozpath.split(pattern)) 1045 elif os.path.isdir(os.path.join(self.base, pattern)): 1046 return self._find_dir(pattern) 1047 else: 1048 f = self.get(pattern) 1049 return ((pattern, f),) if f else () 1050 1051 def _find_dir(self, path): 1052 """ 1053 Actual implementation of FileFinder.find() when the given pattern 1054 corresponds to an existing directory under the base directory. 1055 Ignores file names starting with a '.' under the given path. If the 1056 path itself has leafs starting with a '.', they are not ignored. 1057 """ 1058 for p in self.ignore: 1059 if mozpath.match(path, p): 1060 return 1061 1062 # The sorted makes the output idempotent. Otherwise, we are 1063 # likely dependent on filesystem implementation details, such as 1064 # inode ordering. 1065 for p in sorted(os.listdir(os.path.join(self.base, path))): 1066 if p.startswith("."): 1067 if p in (".", ".."): 1068 continue 1069 if not self.find_dotfiles: 1070 continue 1071 yield from self._find(mozpath.join(path, p)) 1072 1073 def get(self, path): 1074 srcpath = os.path.join(self.base, path) 1075 if not os.path.lexists(srcpath): 1076 return None 1077 1078 if self.ignore_broken_symlinks and not os.path.exists(srcpath): 1079 return None 1080 1081 for p in self.ignore: 1082 if mozpath.match(path, p): 1083 return None 1084 1085 if self.find_executables and is_executable(srcpath): 1086 return ExecutableFile(srcpath) 1087 else: 1088 return File(srcpath) 1089 1090 def _find_glob(self, base, pattern): 1091 """ 1092 Actual implementation of FileFinder.find() when the given pattern 1093 contains globbing patterns ('*' or '**'). This is meant to be an 1094 equivalent of: 1095 for p, f in self: 1096 if mozpath.match(p, pattern): 1097 yield p, f 1098 but avoids scanning the entire tree. 1099 """ 1100 if not pattern: 1101 for p, f in self._find(base): 1102 yield p, f 1103 elif pattern[0] == "**": 1104 for p, f in self._find(base): 1105 if mozpath.match(p, mozpath.join(*pattern)): 1106 yield p, f 1107 elif "*" in pattern[0]: 1108 if not os.path.exists(os.path.join(self.base, base)): 1109 return 1110 1111 for p in self.ignore: 1112 if mozpath.match(base, p): 1113 return 1114 1115 # See above comment w.r.t. sorted() and idempotent behavior. 1116 for p in sorted(os.listdir(os.path.join(self.base, base))): 1117 if p.startswith(".") and not pattern[0].startswith("."): 1118 continue 1119 if mozpath.match(p, pattern[0]): 1120 for p_, f in self._find_glob(mozpath.join(base, p), pattern[1:]): 1121 yield p_, f 1122 else: 1123 for p, f in self._find_glob(mozpath.join(base, pattern[0]), pattern[1:]): 1124 yield p, f 1125 1126 1127 class JarFinder(BaseFinder): 1128 """ 1129 Helper to get appropriate DeflatedFile instances from a JarReader. 1130 """ 1131 1132 def __init__(self, base, reader, **kargs): 1133 """ 1134 Create a JarFinder for files in the given JarReader. The base argument 1135 is used as an indication of the Jar file location. 1136 """ 1137 assert isinstance(reader, JarReader) 1138 BaseFinder.__init__(self, base, **kargs) 1139 self._files = OrderedDict((f.filename, f) for f in reader) 1140 1141 def _find(self, pattern): 1142 """ 1143 Actual implementation of JarFinder.find(), dispatching to specialized 1144 member functions depending on what kind of pattern was given. 1145 """ 1146 return self._find_helper( 1147 pattern, self._files, lambda x: DeflatedFile(self._files[x]) 1148 ) 1149 1150 1151 class TarFinder(BaseFinder): 1152 """ 1153 Helper to get files from a TarFile. 1154 """ 1155 1156 def __init__(self, base, tar, **kargs): 1157 """ 1158 Create a TarFinder for files in the given TarFile. The base argument 1159 is used as an indication of the Tar file location. 1160 """ 1161 assert isinstance(tar, TarFile) 1162 self._tar = tar 1163 BaseFinder.__init__(self, base, **kargs) 1164 self._files = OrderedDict((f.name, f) for f in tar if f.isfile()) 1165 1166 def _find(self, pattern): 1167 """ 1168 Actual implementation of TarFinder.find(), dispatching to specialized 1169 member functions depending on what kind of pattern was given. 1170 """ 1171 return self._find_helper( 1172 pattern, self._files, lambda x: ExtractedTarFile(self._tar, self._files[x]) 1173 ) 1174 1175 1176 class ComposedFinder(BaseFinder): 1177 """ 1178 Composes multiple File Finders in some sort of virtual file system. 1179 1180 A ComposedFinder is initialized from a dictionary associating paths 1181 to `*Finder instances.` 1182 1183 Note this could be optimized to be smarter than getting all the files 1184 in advance. 1185 """ 1186 1187 def __init__(self, finders): 1188 # Can't import globally, because of the dependency of mozpack.copier 1189 # on this module. 1190 from mozpack.copier import FileRegistry 1191 1192 self.files = FileRegistry() 1193 1194 for base, finder in sorted(finders.items()): 1195 if self.files.contains(base): 1196 self.files.remove(base) 1197 for p, f in finder.find(""): 1198 self.files.add(mozpath.join(base, p), f) 1199 1200 def find(self, pattern): 1201 for p in self.files.match(pattern): 1202 yield p, self.files[p] 1203 1204 1205 class MercurialFile(BaseFile): 1206 """File class for holding data from Mercurial.""" 1207 1208 def __init__(self, client, rev, path): 1209 self._content = client.cat([path.encode()], rev=rev.encode()) 1210 1211 def open(self): 1212 return BytesIO(self._content) 1213 1214 def read(self): 1215 return self._content 1216 1217 1218 class MercurialRevisionFinder(BaseFinder): 1219 """A finder that operates on a specific Mercurial revision.""" 1220 1221 def __init__(self, repo, rev=".", recognize_repo_paths=False, **kwargs): 1222 """Create a finder attached to a specific revision in a repository. 1223 1224 If no revision is given, open the parent of the working directory. 1225 1226 ``recognize_repo_paths`` will enable a mode where ``.get()`` will 1227 recognize full paths that include the repo's path. Typically Finder 1228 instances are "bound" to a base directory and paths are relative to 1229 that directory. This mode changes that. When this mode is activated, 1230 ``.find()`` will not work! This mode exists to support the moz.build 1231 reader, which uses absolute paths instead of relative paths. The reader 1232 should eventually be rewritten to use relative paths and this hack 1233 should be removed (TODO bug 1171069). 1234 """ 1235 if not hglib: 1236 raise Exception("hglib package not found") 1237 1238 super().__init__(base=repo, **kwargs) 1239 1240 self._root = mozpath.normpath(repo).rstrip("/") 1241 self._recognize_repo_paths = recognize_repo_paths 1242 1243 # We change directories here otherwise we have to deal with relative 1244 # paths. 1245 oldcwd = os.getcwd() 1246 os.chdir(self._root) 1247 try: 1248 self._client = hglib.open(path=repo, encoding=b"utf-8") 1249 finally: 1250 os.chdir(oldcwd) 1251 self._rev = rev if rev is not None else "." 1252 self._files = OrderedDict() 1253 1254 # Immediately populate the list of files in the repo since nearly every 1255 # operation requires this list. 1256 out = self._client.rawcommand([ 1257 b"files", 1258 b"--rev", 1259 self._rev.encode(), 1260 ]) 1261 for relpath in out.splitlines(): 1262 # Mercurial may use \ as path separator on Windows. So use 1263 # normpath(). 1264 self._files[mozpath.normpath(relpath).decode()] = None 1265 1266 def _find(self, pattern): 1267 if self._recognize_repo_paths: 1268 raise NotImplementedError("cannot use find with recognize_repo_path") 1269 1270 return self._find_helper(pattern, self._files, self._get) 1271 1272 def get(self, path): 1273 path = mozpath.normpath(path) 1274 if self._recognize_repo_paths: 1275 if not path.startswith(self._root): 1276 raise ValueError( 1277 "lookups in recognize_repo_paths mode must be " 1278 "prefixed with repo path: %s" % path 1279 ) 1280 path = path[len(self._root) + 1 :] 1281 1282 try: 1283 return self._get(path) 1284 except KeyError: 1285 return None 1286 1287 def _get(self, path): 1288 # We lazy populate self._files because potentially creating tens of 1289 # thousands of MercurialFile instances for every file in the repo is 1290 # inefficient. 1291 f = self._files[path] 1292 if not f: 1293 f = MercurialFile(self._client, self._rev, path) 1294 self._files[path] = f 1295 1296 return f 1297 1298 1299 class FileListFinder(BaseFinder): 1300 """Finder for a literal list of file names.""" 1301 1302 def __init__(self, files): 1303 self._files = sorted(files) 1304 1305 @memoize 1306 def _match(self, pattern): 1307 """Return a sorted list of all files matching the given pattern.""" 1308 # We don't use the utility _find_helper method because it's not tuned 1309 # for performance in the way that we would like this class to be. That's 1310 # a possible avenue for refactoring here. 1311 ret = [] 1312 # We do this as an optimization to figure out where in the sorted list 1313 # to search and where to stop searching. 1314 components = pattern.split("/") 1315 prefix = "/".join(takewhile(lambda s: "*" not in s, components)) 1316 start = bisect.bisect_left(self._files, prefix) 1317 for i in range(start, len(self._files)): 1318 f = self._files[i] 1319 if not f.startswith(prefix): 1320 break 1321 # Skip hidden files while scanning. 1322 if "/." in f[len(prefix) :]: 1323 continue 1324 if mozpath.match(f, pattern): 1325 ret.append(f) 1326 return ret 1327 1328 def find(self, pattern): 1329 pattern = pattern.strip("/") 1330 for path in self._match(pattern): 1331 yield path, File(path)