tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

leb128iterator.h (8036B)


      1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
      3 /* This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 // LEB128 utilities that can read/write unsigned LEB128 numbers from/to
      8 // iterators.
      9 //
     10 // LEB128 = Little Endian Base 128, where small numbers take few bytes, but
     11 // large numbers are still allowed, which is ideal when serializing numbers that
     12 // are likely to be small.
     13 // Each byte contains 7 bits from the number, starting at the "little end", the
     14 // top bit is 0 for the last byte, 1 otherwise.
     15 // Numbers 0-127 only take 1 byte. 128-16383 take 2 bytes. Etc.
     16 //
     17 // Iterators only need to provide:
     18 // - `*it` to return a reference to the next byte to be read from or written to.
     19 // - `++it` to advance the iterator after a byte is written.
     20 //
     21 // The caller must always provide sufficient space to write any number, by:
     22 // - pre-allocating a large enough buffer, or
     23 // - allocating more space when `++it` reaches the end and/or `*it` is invoked
     24 //   after the end, or
     25 // - moving the underlying pointer to an appropriate location (e.g., wrapping
     26 //   around a circular buffer).
     27 // The caller must also provide enough bytes to read a full value (i.e., at
     28 // least one byte should have its top bit unset), and a type large enough to
     29 // hold the stored value.
     30 //
     31 // Note: There are insufficient checks for validity! These functions are
     32 // intended to be used together, i.e., the user should only `ReadULEB128()` from
     33 // a sufficiently-large buffer that the same user filled with `WriteULEB128()`.
     34 // Using with externally-sourced data (e.g., DWARF) is *not* recommended.
     35 //
     36 // https://en.wikipedia.org/wiki/LEB128
     37 
     38 #ifndef leb128iterator_h
     39 #define leb128iterator_h
     40 
     41 #include "mozilla/Assertions.h"
     42 #include "mozilla/Likely.h"
     43 
     44 #include <climits>
     45 #include <cstdint>
     46 #include <limits>
     47 #include <type_traits>
     48 
     49 namespace mozilla {
     50 
     51 // Number of bytes needed to represent `aValue`.
     52 template <typename T>
     53 constexpr uint_fast8_t ULEB128Size(T aValue) {
     54  static_assert(!std::numeric_limits<T>::is_signed,
     55                "ULEB128Size only takes unsigned types");
     56  // We need one output byte per 7 bits of non-zero value. So we just remove
     57  // 7 least significant bits at a time until the value becomes zero.
     58  // Note the special case of 0, which still needs 1 output byte; this is done
     59  // by starting the first loop before we check for 0.
     60  uint_fast8_t size = 0;
     61  for (;;) {
     62    size += 1;
     63    aValue >>= 7;
     64    // Expecting small values, so it should be more likely that `aValue == 0`.
     65    if (MOZ_LIKELY(aValue == 0)) {
     66      return size;
     67    }
     68  }
     69 }
     70 
     71 // Maximum number of bytes needed to represent any value of type `T`.
     72 template <typename T>
     73 constexpr uint_fast8_t ULEB128MaxSize() {
     74  return ULEB128Size<T>(std::numeric_limits<T>::max());
     75 }
     76 
     77 // Write `aValue` in LEB128 to `aIterator`.
     78 // The iterator will be moved past the last byte.
     79 template <typename T, typename It>
     80 void WriteULEB128(T aValue, It& aIterator) {
     81  static_assert(!std::numeric_limits<T>::is_signed,
     82                "WriteULEB128 only takes unsigned types");
     83  using IteratorValue = std::remove_reference_t<decltype(*aIterator)>;
     84  static_assert(sizeof(IteratorValue) == 1,
     85                "WriteULEB128 expects an iterator to single bytes");
     86  // 0. Don't test for 0 yet, as we want to output one byte for it.
     87  for (;;) {
     88    // 1. Extract the 7 least significant bits.
     89    const uint_fast8_t byte = aValue & 0x7Fu;
     90    // 2. Remove them from `aValue`.
     91    aValue >>= 7;
     92    // 3. Write the 7 bits, and set the 8th bit if `aValue` is not 0 yet
     93    // (meaning there will be more bytes after this one.)
     94    // Expecting small values, so it should be more likely that `aValue == 0`.
     95    // Note: No absolute need to force-cast to IteratorValue, because we have
     96    // only changed the bottom 8 bits above. However the compiler could warn
     97    // about a narrowing conversion from potentially-multibyte uint_fast8_t down
     98    // to whatever single-byte type `*iterator* expects, so we make it explicit.
     99    *aIterator = static_cast<IteratorValue>(
    100        MOZ_LIKELY(aValue == 0) ? byte : (byte | 0x80u));
    101    // 4. Always advance the iterator to the next byte.
    102    ++aIterator;
    103    // 5. We're done if `aValue` is 0.
    104    // Expecting small values, so it should be more likely that `aValue == 0`.
    105    if (MOZ_LIKELY(aValue == 0)) {
    106      return;
    107    }
    108  }
    109 }
    110 
    111 // Read an LEB128 value from `aIterator`.
    112 // The iterator will be moved past the last byte.
    113 template <typename T, typename It>
    114 T ReadULEB128(It& aIterator) {
    115  static_assert(!std::numeric_limits<T>::is_signed,
    116                "ReadULEB128 must return an unsigned type");
    117  using IteratorValue = std::remove_reference_t<decltype(*aIterator)>;
    118  static_assert(sizeof(IteratorValue) == 1,
    119                "ReadULEB128 expects an iterator to single bytes");
    120  // Incoming bits will be added to `result`...
    121  T result = 0;
    122  // ... starting with the least significant bits.
    123  uint_fast8_t shift = 0;
    124  for (;;) {
    125    // 1. Read one byte from the iterator.
    126    // `static_cast` just in case IteratorValue is not implicitly convertible to
    127    // uint_fast8_t. It wouldn't matter if the sign was extended, we're only
    128    // dealing with the bottom 8 bits below.
    129    const uint_fast8_t byte = static_cast<uint_fast8_t>(*aIterator);
    130    // 2. Always advance the iterator.
    131    ++aIterator;
    132    // 3. Extract the 7 bits of value, and shift them in place into `result`.
    133    result |= static_cast<T>(byte & 0x7fu) << shift;
    134    // 4. If the 8th bit is *not* set, this was the last byte.
    135    // Expecting small values, so it should be more likely that the bit is off.
    136    if (MOZ_LIKELY((byte & 0x80u) == 0)) {
    137      return result;
    138    }
    139    // There are more bytes to read.
    140    // 5. Next byte will contain more significant bits above the past 7.
    141    shift += 7;
    142    // Safety check that we're not going to shift by >= than the type size,
    143    // which is Undefined Behavior in C++.
    144    MOZ_ASSERT(shift < CHAR_BIT * sizeof(T));
    145  }
    146 }
    147 
    148 // constexpr ULEB128 reader class.
    149 // Mostly useful when dealing with non-trivial byte feeds.
    150 template <typename T>
    151 class ULEB128Reader {
    152  static_assert(!std::numeric_limits<T>::is_signed,
    153                "ULEB128Reader must handle an unsigned type");
    154 
    155 public:
    156  constexpr ULEB128Reader() = default;
    157 
    158  // Don't allow copy/assignment, it doesn't make sense for a stateful parser.
    159  constexpr ULEB128Reader(const ULEB128Reader&) = delete;
    160  constexpr ULEB128Reader& operator=(const ULEB128Reader&) = delete;
    161 
    162  // Feed a byte into the parser.
    163  // Returns true if this was the last byte.
    164  [[nodiscard]] constexpr bool FeedByteIsComplete(unsigned aByte) {
    165    MOZ_ASSERT(!IsComplete());
    166    // Extract the 7 bits of value, and shift them in place into the value.
    167    mValue |= static_cast<T>(aByte & 0x7fu) << mShift;
    168    // If the 8th bit is *not* set, this was the last byte.
    169    // Expecting small values, so it should be more likely that the bit is off.
    170    if (MOZ_LIKELY((aByte & 0x80u) == 0)) {
    171      mShift = mCompleteShift;
    172      return true;
    173    }
    174    // There are more bytes to read.
    175    // Next byte will contain more significant bits above the past 7.
    176    mShift += 7;
    177    // Safety check that we're not going to shift by >= than the type size,
    178    // which is Undefined Behavior in C++.
    179    MOZ_ASSERT(mShift < CHAR_BIT * sizeof(T));
    180    return false;
    181  }
    182 
    183  constexpr void Reset() {
    184    mValue = 0;
    185    mShift = 0;
    186  }
    187 
    188  [[nodiscard]] constexpr bool IsComplete() const {
    189    return mShift == mCompleteShift;
    190  }
    191 
    192  [[nodiscard]] constexpr T Value() const {
    193    MOZ_ASSERT(IsComplete());
    194    return mValue;
    195  }
    196 
    197 private:
    198  // Special value of `mShift` indicating that parsing is complete.
    199  constexpr static unsigned mCompleteShift = 0x10000u;
    200 
    201  T mValue = 0;
    202  unsigned mShift = 0;
    203 };
    204 
    205 }  // namespace mozilla
    206 
    207 #endif  // leb128iterator_h