copier.py (22354B)
1 # This Source Code Form is subject to the terms of the Mozilla Public 2 # License, v. 2.0. If a copy of the MPL was not distributed with this 3 # file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 5 import errno 6 import os 7 import stat 8 import sys 9 from collections import Counter, OrderedDict, defaultdict 10 from concurrent import futures 11 12 import mozpack.path as mozpath 13 from mozpack.errors import errors 14 from mozpack.files import BaseFile, Dest 15 16 17 class FileRegistry: 18 """ 19 Generic container to keep track of a set of BaseFile instances. It 20 preserves the order under which the files are added, but doesn't keep 21 track of empty directories (directories are not stored at all). 22 The paths associated with the BaseFile instances are relative to an 23 unspecified (virtual) root directory. 24 25 registry = FileRegistry() 26 registry.add('foo/bar', file_instance) 27 """ 28 29 def __init__(self): 30 self._files = OrderedDict() 31 self._required_directories = Counter() 32 self._partial_paths_cache = {} 33 34 def _partial_paths(self, path): 35 """ 36 Turn "foo/bar/baz/zot" into ["foo/bar/baz", "foo/bar", "foo"]. 37 """ 38 dir_name = path.rpartition("/")[0] 39 if not dir_name: 40 return [] 41 42 partial_paths = self._partial_paths_cache.get(dir_name) 43 if partial_paths: 44 return partial_paths 45 46 partial_paths = [dir_name] + self._partial_paths(dir_name) 47 48 self._partial_paths_cache[dir_name] = partial_paths 49 return partial_paths 50 51 def add(self, path, content): 52 """ 53 Add a BaseFile instance to the container, under the given path. 54 """ 55 assert isinstance(content, BaseFile) 56 if path in self._files: 57 return errors.error("%s already added" % path) 58 if self._required_directories[path] > 0: 59 return errors.error("Can't add %s: it is a required directory" % path) 60 # Check whether any parent of the given path is already stored 61 partial_paths = self._partial_paths(path) 62 for partial_path in partial_paths: 63 if partial_path in self._files: 64 return errors.error("Can't add %s: %s is a file" % (path, partial_path)) 65 self._files[path] = content 66 self._required_directories.update(partial_paths) 67 68 def match(self, pattern): 69 """ 70 Return the list of paths, stored in the container, matching the 71 given pattern. See the mozpack.path.match documentation for a 72 description of the handled patterns. 73 """ 74 if "*" in pattern: 75 return [p for p in self.paths() if mozpath.match(p, pattern)] 76 if pattern == "": 77 return self.paths() 78 if pattern in self._files: 79 return [pattern] 80 return [p for p in self.paths() if mozpath.basedir(p, [pattern]) == pattern] 81 82 def remove(self, pattern): 83 """ 84 Remove paths matching the given pattern from the container. See the 85 mozpack.path.match documentation for a description of the handled 86 patterns. 87 """ 88 items = self.match(pattern) 89 if not items: 90 return errors.error( 91 "Can't remove %s: %s" 92 % (pattern, "not matching anything previously added") 93 ) 94 for i in items: 95 del self._files[i] 96 self._required_directories.subtract(self._partial_paths(i)) 97 98 def paths(self): 99 """ 100 Return all paths stored in the container, in the order they were added. 101 """ 102 return list(self._files) 103 104 def __len__(self): 105 """ 106 Return number of paths stored in the container. 107 """ 108 return len(self._files) 109 110 def __contains__(self, pattern): 111 raise RuntimeError( 112 "'in' operator forbidden for %s. Use contains()." % self.__class__.__name__ 113 ) 114 115 def contains(self, pattern): 116 """ 117 Return whether the container contains paths matching the given 118 pattern. See the mozpack.path.match documentation for a description of 119 the handled patterns. 120 """ 121 return len(self.match(pattern)) > 0 122 123 def __getitem__(self, path): 124 """ 125 Return the BaseFile instance stored in the container for the given 126 path. 127 """ 128 return self._files[path] 129 130 def __iter__(self): 131 """ 132 Iterate over all (path, BaseFile instance) pairs from the container. 133 for path, file in registry: 134 (...) 135 """ 136 return iter(self._files.items()) 137 138 def required_directories(self): 139 """ 140 Return the set of directories required by the paths in the container, 141 in no particular order. The returned directories are relative to an 142 unspecified (virtual) root directory (and do not include said root 143 directory). 144 """ 145 return set(k for k, v in self._required_directories.items() if v > 0) 146 147 def output_to_inputs_tree(self): 148 """ 149 Return a dictionary mapping each output path to the set of its 150 required input paths. 151 152 All paths are normalized. 153 """ 154 tree = {} 155 for output, file in self: 156 output = mozpath.normpath(output) 157 tree[output] = set(mozpath.normpath(f) for f in file.inputs()) 158 return tree 159 160 def input_to_outputs_tree(self): 161 """ 162 Return a dictionary mapping each input path to the set of 163 impacted output paths. 164 165 All paths are normalized. 166 """ 167 tree = defaultdict(set) 168 for output, file in self: 169 output = mozpath.normpath(output) 170 for input in file.inputs(): 171 input = mozpath.normpath(input) 172 tree[input].add(output) 173 return dict(tree) 174 175 176 class FileRegistrySubtree: 177 """A proxy class to give access to a subtree of an existing FileRegistry. 178 179 Note this doesn't implement the whole FileRegistry interface.""" 180 181 def __new__(cls, base, registry): 182 if not base: 183 return registry 184 return object.__new__(cls) 185 186 def __init__(self, base, registry): 187 self._base = base 188 self._registry = registry 189 190 def _get_path(self, path): 191 # mozpath.join will return a trailing slash if path is empty, and we 192 # don't want that. 193 return mozpath.join(self._base, path) if path else self._base 194 195 def add(self, path, content): 196 return self._registry.add(self._get_path(path), content) 197 198 def match(self, pattern): 199 return [ 200 mozpath.relpath(p, self._base) 201 for p in self._registry.match(self._get_path(pattern)) 202 ] 203 204 def remove(self, pattern): 205 return self._registry.remove(self._get_path(pattern)) 206 207 def paths(self): 208 return [p for p, f in self] 209 210 def __len__(self): 211 return len(self.paths()) 212 213 def contains(self, pattern): 214 return self._registry.contains(self._get_path(pattern)) 215 216 def __getitem__(self, path): 217 return self._registry[self._get_path(path)] 218 219 def __iter__(self): 220 for p, f in self._registry: 221 if mozpath.basedir(p, [self._base]): 222 yield mozpath.relpath(p, self._base), f 223 224 225 class FileCopyResult: 226 """Represents results of a FileCopier.copy operation.""" 227 228 def __init__(self): 229 self.updated_files = set() 230 self.existing_files = set() 231 self.removed_files = set() 232 self.removed_directories = set() 233 234 @property 235 def updated_files_count(self): 236 return len(self.updated_files) 237 238 @property 239 def existing_files_count(self): 240 return len(self.existing_files) 241 242 @property 243 def removed_files_count(self): 244 return len(self.removed_files) 245 246 @property 247 def removed_directories_count(self): 248 return len(self.removed_directories) 249 250 251 class FileCopier(FileRegistry): 252 """ 253 FileRegistry with the ability to copy the registered files to a separate 254 directory. 255 """ 256 257 def copy( 258 self, 259 destination, 260 skip_if_older=True, 261 remove_unaccounted=True, 262 remove_all_directory_symlinks=True, 263 remove_empty_directories=True, 264 ): 265 """ 266 Copy all registered files to the given destination path. The given 267 destination can be an existing directory, or not exist at all. It 268 can't be e.g. a file. 269 The copy process acts a bit like rsync: files are not copied when they 270 don't need to (see mozpack.files for details on file.copy). 271 272 By default, files in the destination directory that aren't 273 registered are removed and empty directories are deleted. In 274 addition, all directory symlinks in the destination directory 275 are deleted: this is a conservative approach to ensure that we 276 never accidently write files into a directory that is not the 277 destination directory. In the worst case, we might have a 278 directory symlink in the object directory to the source 279 directory. 280 281 To disable removing of unregistered files, pass 282 remove_unaccounted=False. To disable removing empty 283 directories, pass remove_empty_directories=False. In rare 284 cases, you might want to maintain directory symlinks in the 285 destination directory (at least those that are not required to 286 be regular directories): pass 287 remove_all_directory_symlinks=False. Exercise caution with 288 this flag: you almost certainly do not want to preserve 289 directory symlinks. 290 291 Returns a FileCopyResult that details what changed. 292 """ 293 assert isinstance(destination, str) 294 assert not os.path.exists(destination) or os.path.isdir(destination) 295 296 result = FileCopyResult() 297 have_symlinks = hasattr(os, "symlink") 298 destination = os.path.normpath(destination) 299 300 # We create the destination directory specially. We can't do this as 301 # part of the loop doing mkdir() below because that loop munges 302 # symlinks and permissions and parent directories of the destination 303 # directory may have their own weird schema. The contract is we only 304 # manage children of destination, not its parents. 305 os.makedirs(destination, exist_ok=True) 306 307 # Because we could be handling thousands of files, code in this 308 # function is optimized to minimize system calls. We prefer CPU time 309 # in Python over possibly I/O bound filesystem calls to stat() and 310 # friends. 311 312 required_dirs = set([destination]) 313 required_dirs |= set( 314 os.path.normpath(os.path.join(destination, d)) 315 for d in self.required_directories() 316 ) 317 318 # Ensure destination directories are in place and proper. 319 # 320 # The "proper" bit is important. We need to ensure that directories 321 # have appropriate permissions or we will be unable to discover 322 # and write files. Furthermore, we need to verify directories aren't 323 # symlinks. 324 # 325 # Symlinked directories (a symlink whose target is a directory) are 326 # incompatible with us because our manifest talks in terms of files, 327 # not directories. If we leave symlinked directories unchecked, we 328 # would blindly follow symlinks and this might confuse file 329 # installation. For example, if an existing directory is a symlink 330 # to directory X and we attempt to install a symlink in this directory 331 # to a file in directory X, we may create a recursive symlink! 332 for d in sorted(required_dirs, key=len): 333 os.makedirs(d, exist_ok=True) 334 335 # We allow the destination to be a symlink because the caller 336 # is responsible for managing the destination and we assume 337 # they know what they are doing. 338 if have_symlinks and d != destination: 339 st = os.lstat(d) 340 if stat.S_ISLNK(st.st_mode): 341 # While we have remove_unaccounted, it doesn't apply 342 # to directory symlinks because if it did, our behavior 343 # could be very wrong. 344 os.remove(d) 345 os.mkdir(d) 346 347 if not os.access(d, os.W_OK): 348 umask = os.umask(0o077) 349 os.umask(umask) 350 os.chmod(d, 0o777 & ~umask) 351 352 if isinstance(remove_unaccounted, FileRegistry): 353 existing_files = set( 354 os.path.normpath(os.path.join(destination, p)) 355 for p in remove_unaccounted.paths() 356 ) 357 existing_dirs = set( 358 os.path.normpath(os.path.join(destination, p)) 359 for p in remove_unaccounted.required_directories() 360 ) 361 existing_dirs |= {os.path.normpath(destination)} 362 else: 363 # While we have remove_unaccounted, it doesn't apply to empty 364 # directories because it wouldn't make sense: an empty directory 365 # is empty, so removing it should have no effect. 366 existing_dirs = set() 367 existing_files = set() 368 for root, dirs, files in os.walk(destination): 369 # We need to perform the same symlink detection as above. 370 # os.walk() doesn't follow symlinks into directories by 371 # default, so we need to check dirs (we can't wait for root). 372 if have_symlinks: 373 filtered = [] 374 for d in dirs: 375 full = os.path.join(root, d) 376 st = os.lstat(full) 377 if stat.S_ISLNK(st.st_mode): 378 # This directory symlink is not a required 379 # directory: any such symlink would have been 380 # removed and a directory created above. 381 if remove_all_directory_symlinks: 382 os.remove(full) 383 result.removed_files.add(os.path.normpath(full)) 384 else: 385 existing_files.add(os.path.normpath(full)) 386 else: 387 filtered.append(d) 388 389 dirs[:] = filtered 390 391 existing_dirs.add(os.path.normpath(root)) 392 393 for d in dirs: 394 existing_dirs.add(os.path.normpath(os.path.join(root, d))) 395 396 for f in files: 397 existing_files.add(os.path.normpath(os.path.join(root, f))) 398 399 # Now we reconcile the state of the world against what we want. 400 dest_files = set() 401 402 # Install files. 403 # 404 # Creating/appending new files on Windows/NTFS is slow. So we use a 405 # thread pool to speed it up significantly. The performance of this 406 # loop is so critical to common build operations on Linux that the 407 # overhead of the thread pool is worth avoiding, so we have 2 code 408 # paths. We also employ a low water mark to prevent thread pool 409 # creation if number of files is too small to benefit. 410 copy_results = [] 411 if sys.platform == "win32" and len(self) > 100: 412 with futures.ThreadPoolExecutor(4) as e: 413 fs = [] 414 for p, f in self: 415 destfile = os.path.normpath(os.path.join(destination, p)) 416 fs.append((destfile, e.submit(f.copy, destfile, skip_if_older))) 417 418 copy_results = [(path, f.result) for path, f in fs] 419 else: 420 for p, f in self: 421 destfile = os.path.normpath(os.path.join(destination, p)) 422 copy_results.append((destfile, f.copy(destfile, skip_if_older))) 423 424 for destfile, copy_result in copy_results: 425 dest_files.add(destfile) 426 if copy_result: 427 result.updated_files.add(destfile) 428 else: 429 result.existing_files.add(destfile) 430 431 # Remove files no longer accounted for. 432 if remove_unaccounted: 433 for f in existing_files - dest_files: 434 # Windows requires write access to remove files. 435 if os.name == "nt" and not os.access(f, os.W_OK): 436 # It doesn't matter what we set permissions to since we 437 # will remove this file shortly. 438 os.chmod(f, 0o600) 439 440 os.remove(f) 441 result.removed_files.add(f) 442 443 if not remove_empty_directories: 444 return result 445 446 # Figure out which directories can be removed. This is complicated 447 # by the fact we optionally remove existing files. This would be easy 448 # if we walked the directory tree after installing files. But, we're 449 # trying to minimize system calls. 450 451 # Start with the ideal set. 452 remove_dirs = existing_dirs - required_dirs 453 454 # Then don't remove directories if we didn't remove unaccounted files 455 # and one of those files exists. 456 if not remove_unaccounted: 457 parents = set() 458 pathsep = os.path.sep 459 for f in existing_files: 460 path = f 461 while True: 462 # All the paths are normalized and relative by this point, 463 # so os.path.dirname would only do extra work. 464 dirname = path.rpartition(pathsep)[0] 465 if dirname in parents: 466 break 467 parents.add(dirname) 468 path = dirname 469 remove_dirs -= parents 470 471 # Remove empty directories that aren't required. 472 for d in sorted(remove_dirs, key=len, reverse=True): 473 try: 474 try: 475 os.rmdir(d) 476 except OSError as e: 477 if e.errno in (errno.EPERM, errno.EACCES): 478 # Permissions may not allow deletion. So ensure write 479 # access is in place before attempting to rmdir again. 480 os.chmod(d, 0o700) 481 os.rmdir(d) 482 else: 483 raise 484 except OSError as e: 485 # If remove_unaccounted is a # FileRegistry, then we have a 486 # list of directories that may not be empty, so ignore rmdir 487 # ENOTEMPTY errors for them. 488 if ( 489 isinstance(remove_unaccounted, FileRegistry) 490 and e.errno == errno.ENOTEMPTY 491 ): 492 continue 493 raise 494 result.removed_directories.add(d) 495 496 return result 497 498 499 class Jarrer(FileRegistry, BaseFile): 500 """ 501 FileRegistry with the ability to copy and pack the registered files as a 502 jar file. Also acts as a BaseFile instance, to be copied with a FileCopier. 503 """ 504 505 def __init__(self, compress=True): 506 """ 507 Create a Jarrer instance. See mozpack.mozjar.JarWriter documentation 508 for details on the compress argument. 509 """ 510 self.compress = compress 511 self._preload = [] 512 self._compress_options = {} # Map path to compress boolean option. 513 FileRegistry.__init__(self) 514 515 def add(self, path, content, compress=None): 516 FileRegistry.add(self, path, content) 517 if compress is not None: 518 self._compress_options[path] = compress 519 520 def copy(self, dest, skip_if_older=True): 521 """ 522 Pack all registered files in the given destination jar. The given 523 destination jar may be a path to jar file, or a Dest instance for 524 a jar file. 525 If the destination jar file exists, its (compressed) contents are used 526 instead of the registered BaseFile instances when appropriate. 527 """ 528 529 class DeflaterDest(Dest): 530 """ 531 Dest-like class, reading from a file-like object initially, but 532 switching to a Deflater object if written to. 533 534 dest = DeflaterDest(original_file) 535 dest.read() # Reads original_file 536 dest.write(data) # Creates a Deflater and write data there 537 dest.read() # Re-opens the Deflater and reads from it 538 """ 539 540 def __init__(self, orig=None, compress=True): 541 self.mode = None 542 self.deflater = orig 543 self.compress = compress 544 545 def read(self, length=-1): 546 if self.mode != "r": 547 assert self.mode is None 548 self.mode = "r" 549 return self.deflater.read(length) 550 551 def write(self, data): 552 if self.mode != "w": 553 from mozpack.mozjar import Deflater 554 555 self.deflater = Deflater(self.compress) 556 self.mode = "w" 557 self.deflater.write(data) 558 559 def exists(self): 560 return self.deflater is not None 561 562 if isinstance(dest, str): 563 dest = Dest(dest) 564 assert isinstance(dest, Dest) 565 566 from mozpack.mozjar import JarReader, JarWriter 567 568 try: 569 old_jar = JarReader(fileobj=dest) 570 except Exception: 571 old_jar = [] 572 573 old_contents = dict([(f.filename, f) for f in old_jar]) 574 575 with JarWriter(fileobj=dest, compress=self.compress) as jar: 576 for path, file in self: 577 compress = self._compress_options.get(path, self.compress) 578 if path in old_contents: 579 deflater = DeflaterDest(old_contents[path], compress) 580 else: 581 deflater = DeflaterDest(compress=compress) 582 file.copy(deflater, skip_if_older) 583 jar.add(path, deflater.deflater, mode=file.mode, compress=compress) 584 if self._preload: 585 jar.preload(self._preload) 586 587 def open(self): 588 raise RuntimeError("unsupported") 589 590 def preload(self, paths): 591 """ 592 Add the given set of paths to the list of preloaded files. See 593 mozpack.mozjar.JarWriter documentation for details on jar preloading. 594 """ 595 self._preload.extend(paths)