tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

WasmSerialize.h (11812B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 *
      4 * Copyright 2022 Mozilla Foundation
      5 *
      6 * Licensed under the Apache License, Version 2.0 (the "License");
      7 * you may not use this file except in compliance with the License.
      8 * You may obtain a copy of the License at
      9 *
     10 *     http://www.apache.org/licenses/LICENSE-2.0
     11 *
     12 * Unless required by applicable law or agreed to in writing, software
     13 * distributed under the License is distributed on an "AS IS" BASIS,
     14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15 * See the License for the specific language governing permissions and
     16 * limitations under the License.
     17 */
     18 
     19 #ifndef wasm_serialize_h
     20 #define wasm_serialize_h
     21 
     22 #include "mozilla/CheckedInt.h"
     23 #include "mozilla/MacroForEach.h"
     24 #include "mozilla/Maybe.h"
     25 #include "mozilla/Result.h"
     26 
     27 #include <cstdint>
     28 #include <cstring>
     29 #include <type_traits>
     30 
     31 namespace js {
     32 namespace wasm {
     33 
     34 class TypeContext;
     35 
     36 // [SMDOC] "Module serialization"
     37 //
     38 // A wasm::Module may be serialized to a binary format that allows for quick
     39 // reloads of a previous compiled wasm binary.
     40 //
     41 // The binary format is optimized for encoding/decoding speed, not size. There
     42 // is no formal specification, and no backwards/forwards compatibility
     43 // guarantees. The prelude of the encoding contains a 'build ID' which must be
     44 // used when reading from a cache entry to determine if it is valid.
     45 //
     46 // Module serialization and deserialization are performed using templated
     47 // functions that allow for (imperfect) abstraction over whether we are decoding
     48 // or encoding the module. It can be viewed as a specialization of the visitor
     49 // pattern.
     50 //
     51 // Each module data structure is visited by a function parameterized by the
     52 // "mode", which may be either:
     53 //  1. MODE_SIZE - We are computing the final encoding size, before encoding it
     54 //  2. MODE_ENCODE - We are actually encoding the module to bytes
     55 //  3. MODE_DECODE - We are decoding the module from bytes
     56 //
     57 // These functions are called "coding" functions, as they are generic to whether
     58 // we are "encoding" or "decoding". The verb tense "code" is used for the
     59 // prefix.
     60 //
     61 // Each coding function takes the item being visited, along with a "Coder"
     62 // which contains the state needed for each mode. This is either a buffer span
     63 // or an accumulated length. The coding function either manipulates the Coder
     64 // directly or delegates to its field's coding functions.
     65 //
     66 // Leaf data types are usually just copied directly to and from memory using a
     67 // generic "CodePod" function. See the "cacheable POD" documentation in this
     68 // file for more information.
     69 //
     70 // Non-leaf data types need an explicit coding function. This function can
     71 // usually be completely generic to decoding/encoding, and delegate to the
     72 // coding functions for each field. Separate decoding/encoding functions may
     73 // be needed when decoding requires initialization logic, such as constructors.
     74 // In this case, it is critical that both functions agree on the fields to be
     75 // coded, and the order they are coded in.
     76 //
     77 // Coding functions are defined as free functions in "WasmSerialize.cpp". When
     78 // they require access to protected state in a type, they may use the
     79 // WASM_DECLARE_FRIEND_SERIALIZE macro.
     80 
     81 // Signal an out of memory condition
     82 struct OutOfMemory {};
     83 
     84 // The result of serialization, either OK or OOM
     85 using CoderResult = mozilla::Result<mozilla::Ok, OutOfMemory>;
     86 
     87 // CoderMode parameterizes the coding functions
     88 enum CoderMode {
     89  // We are computing the final size of the encoded buffer. This is a discrete
     90  // pass that runs before encoding.
     91  MODE_SIZE,
     92  // We are encoding the module to bytes.
     93  MODE_ENCODE,
     94  // We are decoding the module from bytes.
     95  MODE_DECODE,
     96 };
     97 
     98 // Coding functions take a different argument depending on which CoderMode
     99 // they are invoked with:
    100 //   * MODE_SIZE - const T*
    101 //   * MODE_ENCODE - const T*
    102 //   * MODE_DECODE - T*
    103 //
    104 // The CoderArg<mode, T> type alias is used to acquire the proper type for
    105 // coding function arguments.
    106 template <CoderMode mode, typename V>
    107 struct CoderArgT;
    108 
    109 template <typename V>
    110 struct CoderArgT<MODE_SIZE, V> {
    111  using T = const V*;
    112 };
    113 
    114 template <typename V>
    115 struct CoderArgT<MODE_DECODE, V> {
    116  using T = V*;
    117 };
    118 
    119 template <typename V>
    120 struct CoderArgT<MODE_ENCODE, V> {
    121  using T = const V*;
    122 };
    123 
    124 template <CoderMode mode, typename T>
    125 using CoderArg = typename CoderArgT<mode, T>::T;
    126 
    127 // Coder is the state provided to all coding functions during module traversal.
    128 template <CoderMode mode>
    129 struct Coder;
    130 
    131 // A Coder<MODE_SIZE> computes the total encoded size of a module
    132 template <>
    133 struct Coder<MODE_SIZE> {
    134  explicit Coder(const TypeContext* types) : types_(types), size_(0) {}
    135 
    136  // The types of the module that we're going to encode. This is required in
    137  // order to encode the original index of types that we encounter.
    138  const TypeContext* types_;
    139 
    140  // The current size of buffer required to serialize this module.
    141  mozilla::CheckedInt<size_t> size_;
    142 
    143  // This function shares a signature with MODE_ENCODE to allow functions to be
    144  // generic across MODE_SIZE/MODE_ENCODE, even though the src pointer is not
    145  // needed for MODE_SIZE.
    146  CoderResult writeBytes(const void* unusedSrc, size_t length);
    147 };
    148 
    149 // A Coder<MODE_ENCODE> holds the buffer being written to
    150 template <>
    151 struct Coder<MODE_ENCODE> {
    152  Coder(const TypeContext* types, uint8_t* start, size_t length)
    153      : types_(types), buffer_(start), end_(start + length) {}
    154 
    155  // The types of the module that we're encoding. This is required in
    156  // order to encode the original index of types that we encounter.
    157  const TypeContext* types_;
    158 
    159  // The current position in the buffer we're writing to.
    160  uint8_t* buffer_;
    161  // The end position in the buffer we're writing to.
    162  const uint8_t* end_;
    163 
    164  CoderResult writeBytes(const void* src, size_t length);
    165 };
    166 
    167 // A Coder<MODE_DECODE> holds the buffer being read from
    168 template <>
    169 struct Coder<MODE_DECODE> {
    170  Coder(const uint8_t* start, size_t length)
    171      : types_(nullptr), buffer_(start), end_(start + length) {}
    172 
    173  // The types of the module that we're decoding. This is null until the types
    174  // of this module are decoded.
    175  const TypeContext* types_;
    176 
    177  // The current position in the buffer we're reading from.
    178  const uint8_t* buffer_;
    179  // The end position in the buffer we're reading from.
    180  const uint8_t* end_;
    181 
    182  CoderResult readBytes(void* dest, size_t length);
    183  CoderResult readBytesRef(size_t length, const uint8_t** bytesBegin);
    184 };
    185 
    186 // Macros to help types declare friendship with a coding function
    187 
    188 #define WASM_DECLARE_FRIEND_SERIALIZE(TYPE) \
    189  template <CoderMode mode>                 \
    190  friend CoderResult Code##TYPE(Coder<mode>&, CoderArg<mode, TYPE>);
    191 
    192 #define WASM_DECLARE_FRIEND_SERIALIZE_ARGS(TYPE, ARGS...) \
    193  template <CoderMode mode>                               \
    194  friend CoderResult Code##TYPE(Coder<mode>&, CoderArg<mode, TYPE>, ARGS);
    195 
    196 // [SMDOC] "Cacheable POD"
    197 //
    198 // Module serialization relies on copying simple structs to and from the
    199 // cache format. We need a way to ensure that we only do this on types that are
    200 // "safe". We call this "cacheable POD". Note: this is not the same thing as
    201 // "POD" as that may contain pointers, which are not cacheable.
    202 //
    203 // We define cacheable POD (C-POD) recursively upon types:
    204 //   1. any integer type is C-POD
    205 //   2. any floating point type is C-POD
    206 //   3. any enum type is C-POD
    207 //   4. any mozilla::Maybe<T> with T: C-POD is C-POD
    208 //   5. any T[N] with T: C-POD is C-POD
    209 //   6. any union where all fields are C-POD is C-POD
    210 //   7. any struct with the following conditions must is C-POD
    211 //      * every field's type must be C-POD
    212 //      * the parent type, if it exists, must also be C-POD
    213 //      * there must be no virtual methods
    214 //
    215 // There are no combination of C++ type traits at this time that can
    216 // automatically meet these criteria, so we are rolling our own system.
    217 //
    218 // We define a "IsCacheablePod" type trait, with builtin rules for cases (1-5).
    219 // The complex cases (6-7) are handled using manual declaration and checking
    220 // macros that must be used upon structs and unions that are considered
    221 // cacheable POD.
    222 //
    223 // See the following macros for details:
    224 //   - WASM_DECLARE_CACHEABLE_POD
    225 //   - WASM_CHECK_CACHEABLE_POD[_WITH_PARENT]
    226 
    227 // The IsCacheablePod type trait primary template. Contains the rules for
    228 // (cases 1-3).
    229 template <typename T>
    230 struct IsCacheablePod
    231    : public std::conditional_t<std::is_arithmetic_v<T> || std::is_enum_v<T>,
    232                                std::true_type, std::false_type> {};
    233 
    234 // Partial specialization for (case 4).
    235 template <typename T>
    236 struct IsCacheablePod<mozilla::Maybe<T>>
    237    : public std::conditional_t<IsCacheablePod<T>::value, std::true_type,
    238                                std::false_type> {};
    239 
    240 // Partial specialization for (case 5).
    241 template <typename T, size_t N>
    242 struct IsCacheablePod<T[N]>
    243    : public std::conditional_t<IsCacheablePod<T>::value, std::true_type,
    244                                std::false_type> {};
    245 
    246 template <class T>
    247 inline constexpr bool is_cacheable_pod = IsCacheablePod<T>::value;
    248 
    249 // Checks if derrived class will not use the structure alignment for its
    250 // next field. It used when pod is a base class.
    251 #define WASM_CHECK_CACHEABLE_POD_PADDING(Type)                \
    252  class __CHECK_PADING_##Type : public Type {                 \
    253   public:                                                    \
    254    char c;                                                   \
    255  };                                                          \
    256  static_assert(sizeof(__CHECK_PADING_##Type) > sizeof(Type), \
    257                #Type " will overlap with next field if inherited");
    258 
    259 // Declare the type 'Type' to be cacheable POD. The definition of the type must
    260 // contain a WASM_CHECK_CACHEABLE_POD[_WITH_PARENT] to ensure all fields of the
    261 // type are cacheable POD.
    262 #define WASM_DECLARE_CACHEABLE_POD(Type)                                      \
    263  static_assert(!std::is_polymorphic_v<Type>,                                 \
    264                #Type "must not have virtual methods");                       \
    265  } /* namespace wasm */                                                      \
    266  } /* namespace js */                                                        \
    267  template <>                                                                 \
    268  struct js::wasm::IsCacheablePod<js::wasm::Type> : public std::true_type {}; \
    269  namespace js {                                                              \
    270  namespace wasm {
    271 
    272 // Helper: check each field's type to be cacheable POD
    273 #define WASM_CHECK_CACHEABLE_POD_FIELD_(Field)                    \
    274  static_assert(js::wasm::IsCacheablePod<decltype(Field)>::value, \
    275                #Field " must be cacheable pod");
    276 
    277 // Check every field in a type definition to ensure they are cacheable POD.
    278 #define WASM_CHECK_CACHEABLE_POD(Fields...) \
    279  MOZ_FOR_EACH(WASM_CHECK_CACHEABLE_POD_FIELD_, (), (Fields))
    280 
    281 // Check every field in a type definition to ensure they are cacheable POD, and
    282 // check that the parent class is also cacheable POD.
    283 #define WASM_CHECK_CACHEABLE_POD_WITH_PARENT(Parent, Fields...) \
    284  static_assert(js::wasm::IsCacheablePod<Parent>::value,        \
    285                #Parent " must be cacheable pod");              \
    286  MOZ_FOR_EACH(WASM_CHECK_CACHEABLE_POD_FIELD_, (), (Fields))
    287 
    288 // Allow fields that are not cacheable POD but are believed to be safe for
    289 // serialization due to some justification.
    290 #define WASM_ALLOW_NON_CACHEABLE_POD_FIELD(Field, Reason)          \
    291  static_assert(!js::wasm::IsCacheablePod<decltype(Field)>::value, \
    292                #Field " is not cacheable due to " Reason);
    293 
    294 }  // namespace wasm
    295 }  // namespace js
    296 
    297 #endif  // wasm_serialize_h