leb128iterator.h (8036B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 // LEB128 utilities that can read/write unsigned LEB128 numbers from/to 8 // iterators. 9 // 10 // LEB128 = Little Endian Base 128, where small numbers take few bytes, but 11 // large numbers are still allowed, which is ideal when serializing numbers that 12 // are likely to be small. 13 // Each byte contains 7 bits from the number, starting at the "little end", the 14 // top bit is 0 for the last byte, 1 otherwise. 15 // Numbers 0-127 only take 1 byte. 128-16383 take 2 bytes. Etc. 16 // 17 // Iterators only need to provide: 18 // - `*it` to return a reference to the next byte to be read from or written to. 19 // - `++it` to advance the iterator after a byte is written. 20 // 21 // The caller must always provide sufficient space to write any number, by: 22 // - pre-allocating a large enough buffer, or 23 // - allocating more space when `++it` reaches the end and/or `*it` is invoked 24 // after the end, or 25 // - moving the underlying pointer to an appropriate location (e.g., wrapping 26 // around a circular buffer). 27 // The caller must also provide enough bytes to read a full value (i.e., at 28 // least one byte should have its top bit unset), and a type large enough to 29 // hold the stored value. 30 // 31 // Note: There are insufficient checks for validity! These functions are 32 // intended to be used together, i.e., the user should only `ReadULEB128()` from 33 // a sufficiently-large buffer that the same user filled with `WriteULEB128()`. 34 // Using with externally-sourced data (e.g., DWARF) is *not* recommended. 35 // 36 // https://en.wikipedia.org/wiki/LEB128 37 38 #ifndef leb128iterator_h 39 #define leb128iterator_h 40 41 #include "mozilla/Assertions.h" 42 #include "mozilla/Likely.h" 43 44 #include <climits> 45 #include <cstdint> 46 #include <limits> 47 #include <type_traits> 48 49 namespace mozilla { 50 51 // Number of bytes needed to represent `aValue`. 52 template <typename T> 53 constexpr uint_fast8_t ULEB128Size(T aValue) { 54 static_assert(!std::numeric_limits<T>::is_signed, 55 "ULEB128Size only takes unsigned types"); 56 // We need one output byte per 7 bits of non-zero value. So we just remove 57 // 7 least significant bits at a time until the value becomes zero. 58 // Note the special case of 0, which still needs 1 output byte; this is done 59 // by starting the first loop before we check for 0. 60 uint_fast8_t size = 0; 61 for (;;) { 62 size += 1; 63 aValue >>= 7; 64 // Expecting small values, so it should be more likely that `aValue == 0`. 65 if (MOZ_LIKELY(aValue == 0)) { 66 return size; 67 } 68 } 69 } 70 71 // Maximum number of bytes needed to represent any value of type `T`. 72 template <typename T> 73 constexpr uint_fast8_t ULEB128MaxSize() { 74 return ULEB128Size<T>(std::numeric_limits<T>::max()); 75 } 76 77 // Write `aValue` in LEB128 to `aIterator`. 78 // The iterator will be moved past the last byte. 79 template <typename T, typename It> 80 void WriteULEB128(T aValue, It& aIterator) { 81 static_assert(!std::numeric_limits<T>::is_signed, 82 "WriteULEB128 only takes unsigned types"); 83 using IteratorValue = std::remove_reference_t<decltype(*aIterator)>; 84 static_assert(sizeof(IteratorValue) == 1, 85 "WriteULEB128 expects an iterator to single bytes"); 86 // 0. Don't test for 0 yet, as we want to output one byte for it. 87 for (;;) { 88 // 1. Extract the 7 least significant bits. 89 const uint_fast8_t byte = aValue & 0x7Fu; 90 // 2. Remove them from `aValue`. 91 aValue >>= 7; 92 // 3. Write the 7 bits, and set the 8th bit if `aValue` is not 0 yet 93 // (meaning there will be more bytes after this one.) 94 // Expecting small values, so it should be more likely that `aValue == 0`. 95 // Note: No absolute need to force-cast to IteratorValue, because we have 96 // only changed the bottom 8 bits above. However the compiler could warn 97 // about a narrowing conversion from potentially-multibyte uint_fast8_t down 98 // to whatever single-byte type `*iterator* expects, so we make it explicit. 99 *aIterator = static_cast<IteratorValue>( 100 MOZ_LIKELY(aValue == 0) ? byte : (byte | 0x80u)); 101 // 4. Always advance the iterator to the next byte. 102 ++aIterator; 103 // 5. We're done if `aValue` is 0. 104 // Expecting small values, so it should be more likely that `aValue == 0`. 105 if (MOZ_LIKELY(aValue == 0)) { 106 return; 107 } 108 } 109 } 110 111 // Read an LEB128 value from `aIterator`. 112 // The iterator will be moved past the last byte. 113 template <typename T, typename It> 114 T ReadULEB128(It& aIterator) { 115 static_assert(!std::numeric_limits<T>::is_signed, 116 "ReadULEB128 must return an unsigned type"); 117 using IteratorValue = std::remove_reference_t<decltype(*aIterator)>; 118 static_assert(sizeof(IteratorValue) == 1, 119 "ReadULEB128 expects an iterator to single bytes"); 120 // Incoming bits will be added to `result`... 121 T result = 0; 122 // ... starting with the least significant bits. 123 uint_fast8_t shift = 0; 124 for (;;) { 125 // 1. Read one byte from the iterator. 126 // `static_cast` just in case IteratorValue is not implicitly convertible to 127 // uint_fast8_t. It wouldn't matter if the sign was extended, we're only 128 // dealing with the bottom 8 bits below. 129 const uint_fast8_t byte = static_cast<uint_fast8_t>(*aIterator); 130 // 2. Always advance the iterator. 131 ++aIterator; 132 // 3. Extract the 7 bits of value, and shift them in place into `result`. 133 result |= static_cast<T>(byte & 0x7fu) << shift; 134 // 4. If the 8th bit is *not* set, this was the last byte. 135 // Expecting small values, so it should be more likely that the bit is off. 136 if (MOZ_LIKELY((byte & 0x80u) == 0)) { 137 return result; 138 } 139 // There are more bytes to read. 140 // 5. Next byte will contain more significant bits above the past 7. 141 shift += 7; 142 // Safety check that we're not going to shift by >= than the type size, 143 // which is Undefined Behavior in C++. 144 MOZ_ASSERT(shift < CHAR_BIT * sizeof(T)); 145 } 146 } 147 148 // constexpr ULEB128 reader class. 149 // Mostly useful when dealing with non-trivial byte feeds. 150 template <typename T> 151 class ULEB128Reader { 152 static_assert(!std::numeric_limits<T>::is_signed, 153 "ULEB128Reader must handle an unsigned type"); 154 155 public: 156 constexpr ULEB128Reader() = default; 157 158 // Don't allow copy/assignment, it doesn't make sense for a stateful parser. 159 constexpr ULEB128Reader(const ULEB128Reader&) = delete; 160 constexpr ULEB128Reader& operator=(const ULEB128Reader&) = delete; 161 162 // Feed a byte into the parser. 163 // Returns true if this was the last byte. 164 [[nodiscard]] constexpr bool FeedByteIsComplete(unsigned aByte) { 165 MOZ_ASSERT(!IsComplete()); 166 // Extract the 7 bits of value, and shift them in place into the value. 167 mValue |= static_cast<T>(aByte & 0x7fu) << mShift; 168 // If the 8th bit is *not* set, this was the last byte. 169 // Expecting small values, so it should be more likely that the bit is off. 170 if (MOZ_LIKELY((aByte & 0x80u) == 0)) { 171 mShift = mCompleteShift; 172 return true; 173 } 174 // There are more bytes to read. 175 // Next byte will contain more significant bits above the past 7. 176 mShift += 7; 177 // Safety check that we're not going to shift by >= than the type size, 178 // which is Undefined Behavior in C++. 179 MOZ_ASSERT(mShift < CHAR_BIT * sizeof(T)); 180 return false; 181 } 182 183 constexpr void Reset() { 184 mValue = 0; 185 mShift = 0; 186 } 187 188 [[nodiscard]] constexpr bool IsComplete() const { 189 return mShift == mCompleteShift; 190 } 191 192 [[nodiscard]] constexpr T Value() const { 193 MOZ_ASSERT(IsComplete()); 194 return mValue; 195 } 196 197 private: 198 // Special value of `mShift` indicating that parsing is complete. 199 constexpr static unsigned mCompleteShift = 0x10000u; 200 201 T mValue = 0; 202 unsigned mShift = 0; 203 }; 204 205 } // namespace mozilla 206 207 #endif // leb128iterator_h