macpkg.py (7615B)
1 # This Source Code Form is subject to the terms of the Mozilla Public 2 # License, v. 2.0. If a copy of the MPL was not distributed with this 3 # file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 5 # TODO: Eventually consolidate with mozpack.pkg module. This is kept separate 6 # for now because of the vast difference in API, and to avoid churn for the 7 # users of this module (docker images, macos SDK artifacts) when changes are 8 # necessary in mozpack.pkg 9 import bz2 10 import concurrent.futures 11 import io 12 import lzma 13 import os 14 import struct 15 import zlib 16 from collections import deque, namedtuple 17 from xml.etree.ElementTree import XML 18 19 20 class ZlibFile: 21 def __init__(self, fileobj): 22 self.fileobj = fileobj 23 self.decompressor = zlib.decompressobj() 24 self.buf = b"" 25 26 def read(self, length): 27 cutoff = min(length, len(self.buf)) 28 result = self.buf[:cutoff] 29 self.buf = self.buf[cutoff:] 30 while len(result) < length: 31 buf = self.fileobj.read(io.DEFAULT_BUFFER_SIZE) 32 if not buf: 33 break 34 buf = self.decompressor.decompress(buf) 35 cutoff = min(length - len(result), len(buf)) 36 result += buf[:cutoff] 37 self.buf += buf[cutoff:] 38 return result 39 40 41 def unxar(fileobj): 42 magic = fileobj.read(4) 43 if magic != b"xar!": 44 raise Exception("Not a XAR?") 45 46 header_size = fileobj.read(2) 47 header_size = struct.unpack(">H", header_size)[0] 48 if header_size > 64: 49 raise Exception( 50 f"Don't know how to handle a {header_size} bytes XAR header size" 51 ) 52 header_size -= 6 # what we've read so far. 53 header = fileobj.read(header_size) 54 if len(header) != header_size: 55 raise Exception("Failed to read XAR header") 56 ( 57 version, 58 compressed_toc_len, 59 uncompressed_toc_len, 60 checksum_type, 61 ) = struct.unpack(">HQQL", header[:22]) 62 if version != 1: 63 raise Exception(f"XAR version {version} not supported") 64 toc = fileobj.read(compressed_toc_len) 65 base = fileobj.tell() 66 if len(toc) != compressed_toc_len: 67 raise Exception("Failed to read XAR TOC") 68 toc = zlib.decompress(toc) 69 if len(toc) != uncompressed_toc_len: 70 raise Exception("Corrupted XAR?") 71 toc = XML(toc).find("toc") 72 queue = deque(toc.findall("file")) 73 while queue: 74 f = queue.pop() 75 queue.extend(f.iterfind("file")) 76 if f.find("type").text != "file": 77 continue 78 filename = f.find("name").text 79 data = f.find("data") 80 length = int(data.find("length").text) 81 size = int(data.find("size").text) 82 offset = int(data.find("offset").text) 83 encoding = data.find("encoding").get("style") 84 fileobj.seek(base + offset, os.SEEK_SET) 85 content = Take(fileobj, length) 86 if encoding == "application/octet-stream": 87 if length != size: 88 raise Exception(f"{length} != {size}") 89 elif encoding == "application/x-bzip2": 90 content = bz2.BZ2File(content) 91 elif encoding == "application/x-gzip": 92 # Despite the encoding saying gzip, it is in fact, a raw zlib stream. 93 content = ZlibFile(content) 94 else: 95 raise Exception(f"XAR encoding {encoding} not supported") 96 97 yield filename, content 98 99 100 class Pbzx: 101 def __init__(self, fileobj): 102 magic = fileobj.read(4) 103 if magic != b"pbzx": 104 raise Exception("Not a PBZX payload?") 105 # The first thing in the file looks like the size of each 106 # decompressed chunk except the last one. It should match 107 # decompressed_size in all cases except last, but we don't 108 # check. 109 chunk_size = fileobj.read(8) 110 chunk_size = struct.unpack(">Q", chunk_size)[0] 111 # Not using mozbuild.util.cpu_count() because this file is used standalone 112 # to generate system symbols. 113 executor = concurrent.futures.ThreadPoolExecutor(max_workers=os.cpu_count()) 114 self.chunk_getter = executor.map(self._uncompress_chunk, self._chunker(fileobj)) 115 self._init_one_chunk() 116 117 @staticmethod 118 def _chunker(fileobj): 119 while True: 120 header = fileobj.read(16) 121 if header == b"": 122 break 123 if len(header) != 16: 124 raise Exception("Corrupted PBZX payload?") 125 decompressed_size, compressed_size = struct.unpack(">QQ", header) 126 chunk = fileobj.read(compressed_size) 127 yield decompressed_size, compressed_size, chunk 128 129 @staticmethod 130 def _uncompress_chunk(data): 131 decompressed_size, compressed_size, chunk = data 132 if compressed_size != decompressed_size: 133 chunk = lzma.decompress(chunk) 134 if len(chunk) != decompressed_size: 135 raise Exception("Corrupted PBZX payload?") 136 return chunk 137 138 def _init_one_chunk(self): 139 self.offset = 0 140 self.chunk = next(self.chunk_getter, "") 141 142 def read(self, length=None): 143 if length == 0: 144 return b"" 145 if length and len(self.chunk) >= self.offset + length: 146 start = self.offset 147 self.offset += length 148 return self.chunk[start : self.offset] 149 else: 150 result = self.chunk[self.offset :] 151 self._init_one_chunk() 152 if self.chunk: 153 # XXX: suboptimal if length is larger than the chunk size 154 result += self.read(None if length is None else length - len(result)) 155 return result 156 157 158 class Take: 159 """ 160 File object wrapper that allows to read at most a certain length. 161 """ 162 163 def __init__(self, fileobj, limit): 164 self.fileobj = fileobj 165 self.limit = limit 166 167 def read(self, length=None): 168 if length is None: 169 length = self.limit 170 else: 171 length = min(length, self.limit) 172 result = self.fileobj.read(length) 173 self.limit -= len(result) 174 return result 175 176 177 CpioInfo = namedtuple("CpioInfo", ["mode", "nlink", "dev", "ino"]) 178 179 180 def uncpio(fileobj): 181 while True: 182 magic = fileobj.read(6) 183 # CPIO payloads in mac pkg files are using the portable ASCII format. 184 if magic != b"070707": 185 if magic.startswith(b"0707"): 186 raise Exception("Unsupported CPIO format") 187 raise Exception("Not a CPIO header") 188 header = fileobj.read(70) 189 ( 190 dev, 191 ino, 192 mode, 193 uid, 194 gid, 195 nlink, 196 rdev, 197 mtime, 198 namesize, 199 filesize, 200 ) = struct.unpack(">6s6s6s6s6s6s6s11s6s11s", header) 201 dev = int(dev, 8) 202 ino = int(ino, 8) 203 mode = int(mode, 8) 204 nlink = int(nlink, 8) 205 namesize = int(namesize, 8) 206 filesize = int(filesize, 8) 207 name = fileobj.read(namesize) 208 if name[-1] != 0: 209 raise Exception("File name is not NUL terminated") 210 name = name[:-1] 211 if name == b"TRAILER!!!": 212 break 213 214 if b"/../" in name or name.startswith(b"../") or name == b"..": 215 raise Exception(".. is forbidden in file name") 216 if name.startswith(b"."): 217 name = name[1:] 218 if name.startswith(b"/"): 219 name = name[1:] 220 content = Take(fileobj, filesize) 221 yield name, CpioInfo(mode=mode, nlink=nlink, dev=dev, ino=ino), content 222 # Ensure the content is totally consumed 223 while content.read(4096): 224 pass