Compression.h (4482B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #ifndef vm_Compression_h 8 #define vm_Compression_h 9 10 #include <zlib.h> 11 12 #include "jstypes.h" 13 14 #include "js/AllocPolicy.h" 15 #include "js/Vector.h" 16 17 namespace js { 18 19 struct CompressedDataHeader { 20 uint32_t compressedBytes; 21 }; 22 23 class Compressor { 24 public: 25 // After compressing CHUNK_SIZE bytes, we will do a full flush so we can 26 // start decompression at that point. 27 static constexpr size_t CHUNK_SIZE = 64 * 1024; 28 29 private: 30 // Number of bytes we should hand to zlib each compressMore() call. 31 static constexpr size_t MAX_INPUT_SIZE = 2 * 1024; 32 33 z_stream zs; 34 const unsigned char* inp = nullptr; 35 size_t inplen = 0; 36 size_t outbytes = 0; 37 bool finished = false; 38 39 // True if |zs| has been initialized by |init|. 40 bool initialized = false; 41 42 // Flag set to |false| after the first call to |setInput|. 43 bool isFirstInput = true; 44 45 // The number of uncompressed bytes written for the current chunk. When this 46 // reaches CHUNK_SIZE, we finish the current chunk and start a new chunk. 47 uint32_t currentChunkSize = 0; 48 49 // At the end of each chunk (and the end of the uncompressed data if it's 50 // not a chunk boundary), we record the offset in the compressed data. 51 js::Vector<uint32_t, 8, SystemAllocPolicy> chunkOffsets; 52 53 public: 54 enum Status { MOREOUTPUT, DONE, CONTINUE, OOM }; 55 56 Compressor(); 57 ~Compressor(); 58 59 Compressor(const Compressor&) = delete; 60 void operator=(const Compressor&) = delete; 61 62 // This should be called once per Compressor, before calling setInput. 63 [[nodiscard]] bool init(); 64 65 // setInput can be called more than once, to compress multiple strings with 66 // minimal overhead. This will reset the compressor's state. 67 [[nodiscard]] bool setInput(const unsigned char* input, size_t inputLength); 68 69 void setOutput(unsigned char* out, size_t outlen); 70 /* Compress some of the input. Return true if it should be called again. */ 71 Status compressMore(); 72 size_t sizeOfChunkOffsets() const { 73 return chunkOffsets.length() * sizeof(chunkOffsets[0]); 74 } 75 76 // Returns the number of bytes needed to store the data currently written + 77 // the chunk offsets. 78 size_t totalBytesNeeded() const; 79 80 // Append the chunk offsets to |dest|. 81 void finish(char* dest, size_t destBytes); 82 83 static void rangeToChunkAndOffset(size_t uncompressedStart, 84 size_t uncompressedLimit, 85 size_t* firstChunk, 86 size_t* firstChunkOffset, 87 size_t* firstChunkSize, size_t* lastChunk, 88 size_t* lastChunkSize) { 89 *firstChunk = uncompressedStart / CHUNK_SIZE; 90 *firstChunkOffset = uncompressedStart % CHUNK_SIZE; 91 *firstChunkSize = CHUNK_SIZE - *firstChunkOffset; 92 93 MOZ_ASSERT(uncompressedStart < uncompressedLimit, 94 "subtraction below requires a non-empty range"); 95 96 *lastChunk = (uncompressedLimit - 1) / CHUNK_SIZE; 97 *lastChunkSize = ((uncompressedLimit - 1) % CHUNK_SIZE) + 1; 98 } 99 100 static size_t chunkSize(size_t uncompressedBytes, size_t chunk) { 101 MOZ_ASSERT(uncompressedBytes > 0, "must have uncompressed data to chunk"); 102 103 size_t startOfChunkBytes = chunk * CHUNK_SIZE; 104 MOZ_ASSERT(startOfChunkBytes < uncompressedBytes, 105 "chunk must refer to bytes not exceeding " 106 "|uncompressedBytes|"); 107 108 size_t remaining = uncompressedBytes - startOfChunkBytes; 109 return remaining < CHUNK_SIZE ? remaining : CHUNK_SIZE; 110 } 111 }; 112 113 /* 114 * Decompress a string. The caller must know the length of the output and 115 * allocate |out| to a string of that length. 116 */ 117 bool DecompressString(const unsigned char* inp, size_t inplen, 118 unsigned char* out, size_t outlen); 119 120 /* 121 * Decompress a single chunk of at most Compressor::CHUNK_SIZE bytes. 122 * |chunk| is the chunk index. The caller must know the length of the output 123 * (the uncompressed chunk) and allocate |out| to a string of that length. 124 */ 125 bool DecompressStringChunk(const unsigned char* inp, size_t chunk, 126 unsigned char* out, size_t outlen); 127 128 } /* namespace js */ 129 130 #endif /* vm_Compression_h */