nsZipArchive.h (12064B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 #ifndef nsZipArchive_h_ 7 #define nsZipArchive_h_ 8 9 #define ZIP_TABSIZE 256 10 #define ZIP_BUFLEN \ 11 (4 * 1024) /* Used as output buffer when deflating items to a file */ 12 13 #include "zlib.h" 14 #include "zipstruct.h" 15 #include "nsIFile.h" 16 #include "nsISupportsImpl.h" // For mozilla::ThreadSafeAutoRefCnt 17 #include "mozilla/ArenaAllocator.h" 18 #include "mozilla/FileUtils.h" 19 #include "mozilla/FileLocation.h" 20 #include "mozilla/Mutex.h" 21 #include "mozilla/UniquePtr.h" 22 23 class nsZipFind; 24 struct PRFileDesc; 25 26 /** 27 * This file defines some of the basic structures used by libjar to 28 * read Zip files. It makes use of zlib in order to do the decompression. 29 * 30 * A few notes on the classes/structs: 31 * nsZipArchive represents a single Zip file, and maintains an index 32 * of all the items in the file. 33 * nsZipItem represents a single item (file) in the Zip archive. 34 * nsZipFind represents the metadata involved in doing a search, 35 * and current state of the iteration of found objects. 36 * 'MT''safe' reading from the zipfile is performed through JARInputStream, 37 * which maintains its own file descriptor, allowing for multiple reads 38 * concurrently from the same zip file. 39 * 40 * nsZipArchives are accessed from multiple threads. 41 */ 42 43 /** 44 * nsZipItem -- a helper struct for nsZipArchive 45 * 46 * each nsZipItem represents one file in the archive and all the 47 * information needed to manipulate it. 48 */ 49 class nsZipItem final { 50 public: 51 nsZipItem(); 52 53 const char* Name() { return ((const char*)central) + ZIPCENTRAL_SIZE; } 54 55 uint32_t LocalOffset(); 56 uint32_t Size(); 57 uint32_t RealSize(); 58 uint32_t CRC32(); 59 uint16_t Date(); 60 uint16_t Time(); 61 uint16_t Compression(); 62 bool IsDirectory(); 63 uint16_t Mode(); 64 const uint8_t* GetExtraField(uint16_t aTag, uint16_t* aBlockSize); 65 PRTime LastModTime(); 66 67 nsZipItem* next; 68 const ZipCentral* central; 69 uint16_t nameLength; 70 bool isSynthetic; 71 }; 72 73 class nsZipHandle; 74 75 /** 76 * nsZipArchive -- a class for reading the PKZIP file format. 77 * 78 */ 79 class nsZipArchive final { 80 friend class nsZipFind; 81 82 /** destructing the object closes the archive */ 83 ~nsZipArchive(); 84 85 public: 86 static const char* sFileCorruptedReason; 87 88 /** 89 * OpenArchive 90 * 91 * @param aZipHandle The nsZipHandle used to access the zip 92 * @param aFd Optional PRFileDesc for Windows readahead optimization 93 * @return status code 94 */ 95 static already_AddRefed<nsZipArchive> OpenArchive(nsZipHandle* aZipHandle, 96 PRFileDesc* aFd = nullptr); 97 98 /** 99 * OpenArchive 100 * 101 * Convenience function that generates nsZipHandle 102 * 103 * @param aFile The file used to access the zip 104 * @return status code 105 */ 106 static already_AddRefed<nsZipArchive> OpenArchive(nsIFile* aFile); 107 108 /** 109 * Test the integrity of items in this archive by running 110 * a CRC check after extracting each item into a memory 111 * buffer. If an entry name is supplied only the 112 * specified item is tested. Else, if null is supplied 113 * then all the items in the archive are tested. 114 * 115 * @return status code 116 */ 117 nsresult Test(const nsACString& aEntryName); 118 119 /** 120 * GetItem 121 * @param aEntryName Name of file in the archive 122 * @return pointer to nsZipItem 123 */ 124 nsZipItem* GetItem(const nsACString& aEntryName); 125 126 /** 127 * ExtractFile 128 * 129 * @param zipEntry Name of file in archive to extract 130 * @param outFD Filedescriptor to write contents to 131 * @param outname Name of file to write to 132 * @return status code 133 */ 134 nsresult ExtractFile(nsZipItem* zipEntry, nsIFile* outFile, 135 PRFileDesc* outFD); 136 137 /** 138 * FindInit 139 * 140 * Initializes a search for files in the archive. FindNext() returns 141 * the actual matches. The nsZipFind must be deleted when you're done 142 * 143 * @param aPattern a string or RegExp pattern to search for 144 * (may be nullptr to find all files in archive) 145 * @param aFind a pointer to a pointer to a structure used 146 * in FindNext. In the case of an error this 147 * will be set to nullptr. 148 * @return status code 149 */ 150 nsresult FindInit(const char* aPattern, nsZipFind** aFind); 151 152 /* 153 * Gets an undependent handle to the mapped file. 154 */ 155 nsZipHandle* GetFD() const; 156 157 /** 158 * Gets the data offset. 159 * @param aItem Pointer to nsZipItem 160 * returns 0 on failure. 161 */ 162 uint32_t GetDataOffset(nsZipItem* aItem); 163 164 /** 165 * Get pointer to the data of the item. 166 * @param aItem Pointer to nsZipItem 167 * reutrns null when zip file is corrupt. 168 */ 169 const uint8_t* GetData(nsZipItem* aItem); 170 171 /** 172 * Gets the amount of memory taken up by the archive's mapping. 173 * @return the size 174 */ 175 int64_t SizeOfMapping(); 176 177 /* 178 * Refcounting 179 */ 180 NS_METHOD_(MozExternalRefCountType) AddRef(void); 181 NS_METHOD_(MozExternalRefCountType) Release(void); 182 183 private: 184 nsZipArchive(nsZipHandle* aZipHandle, PRFileDesc* aFd, nsresult& aRv); 185 186 //--- private members --- 187 mozilla::ThreadSafeAutoRefCnt mRefCnt; /* ref count */ 188 NS_DECL_OWNINGTHREAD 189 190 // These fields are all effectively const after the constructor 191 // file handle 192 const RefPtr<nsZipHandle> mFd; 193 // file URI, for logging 194 nsCString mURI; 195 // Is true if we use zipLog to log accesses in jar/zip archives. This helper 196 // variable avoids grabbing zipLog's lock when not necessary. 197 // Effectively const after constructor 198 bool mUseZipLog; 199 200 mozilla::Mutex mLock{"nsZipArchive"}; 201 // all of the following members are guarded by mLock: 202 nsZipItem* mFiles[ZIP_TABSIZE] MOZ_GUARDED_BY(mLock); 203 mozilla::ArenaAllocator<1024, sizeof(void*)> mArena MOZ_GUARDED_BY(mLock); 204 // Whether we synthesized the directory entries 205 bool mBuiltSynthetics MOZ_GUARDED_BY(mLock); 206 207 private: 208 //--- private methods --- 209 nsZipItem* CreateZipItem() MOZ_REQUIRES(mLock); 210 nsresult BuildFileList(PRFileDesc* aFd = nullptr); 211 nsresult BuildSynthetics(); 212 213 nsZipArchive& operator=(const nsZipArchive& rhs) = delete; 214 nsZipArchive(const nsZipArchive& rhs) = delete; 215 }; 216 217 /** 218 * nsZipFind 219 * 220 * a helper class for nsZipArchive, representing a search 221 */ 222 class nsZipFind final { 223 public: 224 nsZipFind(nsZipArchive* aZip, char* aPattern, bool regExp); 225 ~nsZipFind(); 226 227 nsresult FindNext(const char** aResult, uint16_t* aNameLen); 228 229 private: 230 RefPtr<nsZipArchive> mArchive; 231 char* mPattern; 232 nsZipItem* mItem; 233 uint16_t mSlot; 234 bool mRegExp; 235 236 nsZipFind& operator=(const nsZipFind& rhs) = delete; 237 nsZipFind(const nsZipFind& rhs) = delete; 238 }; 239 240 /** 241 * nsZipCursor -- a low-level class for reading the individual items in a zip. 242 */ 243 class nsZipCursor final { 244 public: 245 /** 246 * Initializes the cursor 247 * 248 * @param aItem Item of interest 249 * @param aZip Archive 250 * @param aBuf Buffer used for decompression. 251 * This determines the maximum Read() size in the 252 * compressed case. 253 * @param aBufSize Buffer size 254 * @param doCRC When set to true Read() will check crc 255 */ 256 nsZipCursor(nsZipItem* aItem, nsZipArchive* aZip, uint8_t* aBuf = nullptr, 257 uint32_t aBufSize = 0, bool doCRC = false); 258 259 ~nsZipCursor(); 260 261 /** 262 * Performs reads. In the compressed case it uses aBuf(passed in constructor), 263 * for stored files it returns a zero-copy buffer. 264 * 265 * @param aBytesRead Outparam for number of bytes read. 266 * @return data read or nullptr if item is corrupted. 267 */ 268 uint8_t* Read(uint32_t* aBytesRead) { return ReadOrCopy(aBytesRead, false); } 269 270 /** 271 * Performs a copy. It always uses aBuf(passed in constructor). 272 * 273 * @param aBytesRead Outparam for number of bytes read. 274 * @return data read or nullptr if item is corrupted. 275 */ 276 uint8_t* Copy(uint32_t* aBytesRead) { return ReadOrCopy(aBytesRead, true); } 277 278 private: 279 /* Actual implementation for both Read and Copy above */ 280 uint8_t* ReadOrCopy(uint32_t* aBytesRead, bool aCopy); 281 282 nsZipItem* mItem; 283 uint8_t* mBuf; 284 uint32_t mBufSize; 285 z_stream mZs; 286 uint32_t mCRC; 287 bool mDoCRC; 288 }; 289 290 /** 291 * nsZipItemPtr - a RAII convenience class for reading the individual items in a 292 * zip. It reads whole files and does zero-copy IO for stored files. A buffer is 293 * allocated for decompression. Do not use when the file may be very large. 294 */ 295 class nsZipItemPtr_base { 296 public: 297 /** 298 * Initializes the reader 299 * 300 * @param aZip Archive 301 * @param aEntryName Archive membername 302 * @param doCRC When set to true Read() will check crc 303 */ 304 nsZipItemPtr_base(nsZipArchive* aZip, const nsACString& aEntryName, 305 bool doCRC); 306 307 uint32_t Length() const { return mReadlen; } 308 309 protected: 310 RefPtr<nsZipHandle> mZipHandle; 311 mozilla::UniquePtr<uint8_t[]> mAutoBuf; 312 uint8_t* mReturnBuf; 313 uint32_t mReadlen; 314 }; 315 316 template <class T> 317 class nsZipItemPtr final : public nsZipItemPtr_base { 318 static_assert(sizeof(T) == sizeof(char), 319 "This class cannot be used with larger T without re-examining" 320 " a number of assumptions."); 321 322 public: 323 nsZipItemPtr(nsZipArchive* aZip, const nsACString& aEntryName, 324 bool doCRC = false) 325 : nsZipItemPtr_base(aZip, aEntryName, doCRC) {} 326 /** 327 * @return buffer containing the whole zip member or nullptr on error. 328 * The returned buffer is owned by nsZipItemReader. 329 */ 330 const T* Buffer() const { return (const T*)mReturnBuf; } 331 332 operator const T*() const { return Buffer(); } 333 334 /** 335 * Relinquish ownership of zip member if compressed. 336 * Copy member into a new buffer if uncompressed. 337 * @return a buffer with whole zip member. It is caller's responsibility to 338 * free() it. 339 */ 340 mozilla::UniquePtr<T[]> Forget() { 341 if (!mReturnBuf) return nullptr; 342 // In uncompressed mmap case, give up buffer 343 if (mAutoBuf.get() == mReturnBuf) { 344 mReturnBuf = nullptr; 345 return mozilla::UniquePtr<T[]>(reinterpret_cast<T*>(mAutoBuf.release())); 346 } 347 auto ret = mozilla::MakeUnique<T[]>(Length()); 348 memcpy(ret.get(), mReturnBuf, Length()); 349 mReturnBuf = nullptr; 350 return ret; 351 } 352 }; 353 354 class nsZipHandle final { 355 friend class nsZipArchive; 356 friend class nsZipFind; 357 friend class mozilla::FileLocation; 358 friend class nsJARInputStream; 359 #if defined(XP_UNIX) && !defined(XP_DARWIN) 360 friend class MmapAccessScope; 361 #endif 362 363 public: 364 static nsresult Init(nsIFile* file, nsZipHandle** ret, 365 PRFileDesc** aFd = nullptr); 366 static nsresult Init(nsZipArchive* zip, const nsACString& entry, 367 nsZipHandle** ret); 368 static nsresult Init(const uint8_t* aData, uint32_t aLen, nsZipHandle** aRet); 369 370 NS_METHOD_(MozExternalRefCountType) AddRef(void); 371 NS_METHOD_(MozExternalRefCountType) Release(void); 372 373 int64_t SizeOfMapping(); 374 375 nsresult GetNSPRFileDesc(PRFileDesc** aNSPRFileDesc); 376 377 protected: 378 const uint8_t* mFileData; /* pointer to zip data */ 379 uint32_t mLen; /* length of zip data */ 380 mozilla::FileLocation mFile; /* source file if any, for logging */ 381 382 private: 383 nsZipHandle(); 384 ~nsZipHandle(); 385 386 nsresult findDataStart(); 387 388 PRFileMap* mMap; /* nspr datastructure for mmap */ 389 mozilla::AutoFDClose mNSPRFileDesc; 390 mozilla::UniquePtr<nsZipItemPtr<uint8_t> > mBuf; 391 mozilla::ThreadSafeAutoRefCnt mRefCnt; /* ref count */ 392 NS_DECL_OWNINGTHREAD 393 394 const uint8_t* mFileStart; /* pointer to mmaped file */ 395 uint32_t mTotalLen; /* total length of the mmaped file */ 396 397 /* Magic number for CRX type expressed in Big Endian since it is a literal */ 398 static const uint32_t kCRXMagic = 0x34327243; 399 }; 400 401 nsresult gZlibInit(z_stream* zs); 402 403 #endif /* nsZipArchive_h_ */