MozZipFile.py (5777B)
1 # This Source Code Form is subject to the terms of the Mozilla Public 2 # License, v. 2.0. If a copy of the MPL was not distributed with this 3 # file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 5 import os 6 import time 7 import zipfile 8 9 from filelock import SoftFileLock 10 11 12 class ZipFile(zipfile.ZipFile): 13 """Class with methods to open, read, write, close, list zip files. 14 15 Subclassing zipfile.ZipFile to allow for overwriting of existing 16 entries, though only for writestr, not for write. 17 """ 18 19 def __init__(self, file, mode="r", compression=zipfile.ZIP_STORED, lock=False): 20 if lock: 21 assert isinstance(file, str) 22 self.lockfile = SoftFileLock(file + ".lck") 23 self.lockfile.acquire() 24 else: 25 self.lockfile = None 26 27 if mode == "a" and lock: 28 # appending to a file which doesn't exist fails, but we can't check 29 # existence util we hold the lock 30 if (not os.path.isfile(file)) or os.path.getsize(file) == 0: 31 mode = "w" 32 33 zipfile.ZipFile.__init__(self, file, mode, compression) 34 self._remove = [] 35 self.end = self.fp.tell() 36 self.debug = 0 37 38 def writestr(self, zinfo_or_arcname, bytes): 39 """Write contents into the archive. 40 41 The contents is the argument 'bytes', 'zinfo_or_arcname' is either 42 a ZipInfo instance or the name of the file in the archive. 43 This method is overloaded to allow overwriting existing entries. 44 """ 45 if not isinstance(zinfo_or_arcname, zipfile.ZipInfo): 46 zinfo = zipfile.ZipInfo( 47 filename=zinfo_or_arcname, date_time=time.localtime(time.time()) 48 ) 49 zinfo.compress_type = self.compression 50 # Add some standard UNIX file access permissions (-rw-r--r--). 51 zinfo.external_attr = (0x81A4 & 0xFFFF) << 16 52 else: 53 zinfo = zinfo_or_arcname 54 55 # Now to the point why we overwrote this in the first place, 56 # remember the entry numbers if we already had this entry. 57 # Optimizations: 58 # If the entry to overwrite is the last one, just reuse that. 59 # If we store uncompressed and the new content has the same size 60 # as the old, reuse the existing entry. 61 62 doSeek = False # store if we need to seek to the eof after overwriting 63 if zinfo.filename in self.NameToInfo: 64 # Find the last ZipInfo with our name. 65 # Last, because that's catching multiple overwrites 66 i = len(self.filelist) 67 while i > 0: 68 i -= 1 69 if self.filelist[i].filename == zinfo.filename: 70 break 71 zi = self.filelist[i] 72 if ( 73 zinfo.compress_type == zipfile.ZIP_STORED 74 and zi.compress_size == len(bytes) 75 ) or (i + 1) == len(self.filelist): 76 # make sure we're allowed to write, otherwise done by writestr below 77 self._writecheck(zi) 78 # overwrite existing entry 79 self.fp.seek(zi.header_offset) 80 if (i + 1) == len(self.filelist): 81 # this is the last item in the file, just truncate 82 self.fp.truncate() 83 else: 84 # we need to move to the end of the file afterwards again 85 doSeek = True 86 # unhook the current zipinfo, the writestr of our superclass 87 # will add a new one 88 self.filelist.pop(i) 89 self.NameToInfo.pop(zinfo.filename) 90 else: 91 # Couldn't optimize, sadly, just remember the old entry for removal 92 self._remove.append(self.filelist.pop(i)) 93 zipfile.ZipFile.writestr(self, zinfo, bytes) 94 self.filelist.sort(key=lambda l: l.header_offset) 95 if doSeek: 96 self.fp.seek(self.end) 97 self.end = self.fp.tell() 98 99 def close(self): 100 """Close the file, and for mode "w" and "a" write the ending 101 records. 102 103 Overwritten to compact overwritten entries. 104 """ 105 if not self._remove: 106 # we don't have anything special to do, let's just call base 107 r = zipfile.ZipFile.close(self) 108 if self.lockfile is not None: 109 self.lockfile.release() 110 self.lockfile = None 111 return r 112 113 if self.fp.mode != "r+b": 114 # adjust file mode if we originally just wrote, now we rewrite 115 self.fp.close() 116 self.fp = open(self.filename, "r+b") 117 all = map(lambda zi: (zi, True), self.filelist) + map( 118 lambda zi: (zi, False), self._remove 119 ) 120 all.sort(key=lambda l: l[0].header_offset) 121 # empty _remove for multiple closes 122 self._remove = [] 123 124 lengths = [ 125 all[i + 1][0].header_offset - all[i][0].header_offset 126 for i in xrange(len(all) - 1) 127 ] 128 lengths.append(self.end - all[-1][0].header_offset) 129 to_pos = 0 130 for (zi, keep), length in zip(all, lengths): 131 if not keep: 132 continue 133 oldoff = zi.header_offset 134 # python <= 2.4 has file_offset 135 if hasattr(zi, "file_offset"): 136 zi.file_offset = zi.file_offset + to_pos - oldoff 137 zi.header_offset = to_pos 138 self.fp.seek(oldoff) 139 content = self.fp.read(length) 140 self.fp.seek(to_pos) 141 self.fp.write(content) 142 to_pos += length 143 self.fp.truncate() 144 zipfile.ZipFile.close(self) 145 if self.lockfile is not None: 146 self.lockfile.release() 147 self.lockfile = None