WasmSerialize.h (11812B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * 4 * Copyright 2022 Mozilla Foundation 5 * 6 * Licensed under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 19 #ifndef wasm_serialize_h 20 #define wasm_serialize_h 21 22 #include "mozilla/CheckedInt.h" 23 #include "mozilla/MacroForEach.h" 24 #include "mozilla/Maybe.h" 25 #include "mozilla/Result.h" 26 27 #include <cstdint> 28 #include <cstring> 29 #include <type_traits> 30 31 namespace js { 32 namespace wasm { 33 34 class TypeContext; 35 36 // [SMDOC] "Module serialization" 37 // 38 // A wasm::Module may be serialized to a binary format that allows for quick 39 // reloads of a previous compiled wasm binary. 40 // 41 // The binary format is optimized for encoding/decoding speed, not size. There 42 // is no formal specification, and no backwards/forwards compatibility 43 // guarantees. The prelude of the encoding contains a 'build ID' which must be 44 // used when reading from a cache entry to determine if it is valid. 45 // 46 // Module serialization and deserialization are performed using templated 47 // functions that allow for (imperfect) abstraction over whether we are decoding 48 // or encoding the module. It can be viewed as a specialization of the visitor 49 // pattern. 50 // 51 // Each module data structure is visited by a function parameterized by the 52 // "mode", which may be either: 53 // 1. MODE_SIZE - We are computing the final encoding size, before encoding it 54 // 2. MODE_ENCODE - We are actually encoding the module to bytes 55 // 3. MODE_DECODE - We are decoding the module from bytes 56 // 57 // These functions are called "coding" functions, as they are generic to whether 58 // we are "encoding" or "decoding". The verb tense "code" is used for the 59 // prefix. 60 // 61 // Each coding function takes the item being visited, along with a "Coder" 62 // which contains the state needed for each mode. This is either a buffer span 63 // or an accumulated length. The coding function either manipulates the Coder 64 // directly or delegates to its field's coding functions. 65 // 66 // Leaf data types are usually just copied directly to and from memory using a 67 // generic "CodePod" function. See the "cacheable POD" documentation in this 68 // file for more information. 69 // 70 // Non-leaf data types need an explicit coding function. This function can 71 // usually be completely generic to decoding/encoding, and delegate to the 72 // coding functions for each field. Separate decoding/encoding functions may 73 // be needed when decoding requires initialization logic, such as constructors. 74 // In this case, it is critical that both functions agree on the fields to be 75 // coded, and the order they are coded in. 76 // 77 // Coding functions are defined as free functions in "WasmSerialize.cpp". When 78 // they require access to protected state in a type, they may use the 79 // WASM_DECLARE_FRIEND_SERIALIZE macro. 80 81 // Signal an out of memory condition 82 struct OutOfMemory {}; 83 84 // The result of serialization, either OK or OOM 85 using CoderResult = mozilla::Result<mozilla::Ok, OutOfMemory>; 86 87 // CoderMode parameterizes the coding functions 88 enum CoderMode { 89 // We are computing the final size of the encoded buffer. This is a discrete 90 // pass that runs before encoding. 91 MODE_SIZE, 92 // We are encoding the module to bytes. 93 MODE_ENCODE, 94 // We are decoding the module from bytes. 95 MODE_DECODE, 96 }; 97 98 // Coding functions take a different argument depending on which CoderMode 99 // they are invoked with: 100 // * MODE_SIZE - const T* 101 // * MODE_ENCODE - const T* 102 // * MODE_DECODE - T* 103 // 104 // The CoderArg<mode, T> type alias is used to acquire the proper type for 105 // coding function arguments. 106 template <CoderMode mode, typename V> 107 struct CoderArgT; 108 109 template <typename V> 110 struct CoderArgT<MODE_SIZE, V> { 111 using T = const V*; 112 }; 113 114 template <typename V> 115 struct CoderArgT<MODE_DECODE, V> { 116 using T = V*; 117 }; 118 119 template <typename V> 120 struct CoderArgT<MODE_ENCODE, V> { 121 using T = const V*; 122 }; 123 124 template <CoderMode mode, typename T> 125 using CoderArg = typename CoderArgT<mode, T>::T; 126 127 // Coder is the state provided to all coding functions during module traversal. 128 template <CoderMode mode> 129 struct Coder; 130 131 // A Coder<MODE_SIZE> computes the total encoded size of a module 132 template <> 133 struct Coder<MODE_SIZE> { 134 explicit Coder(const TypeContext* types) : types_(types), size_(0) {} 135 136 // The types of the module that we're going to encode. This is required in 137 // order to encode the original index of types that we encounter. 138 const TypeContext* types_; 139 140 // The current size of buffer required to serialize this module. 141 mozilla::CheckedInt<size_t> size_; 142 143 // This function shares a signature with MODE_ENCODE to allow functions to be 144 // generic across MODE_SIZE/MODE_ENCODE, even though the src pointer is not 145 // needed for MODE_SIZE. 146 CoderResult writeBytes(const void* unusedSrc, size_t length); 147 }; 148 149 // A Coder<MODE_ENCODE> holds the buffer being written to 150 template <> 151 struct Coder<MODE_ENCODE> { 152 Coder(const TypeContext* types, uint8_t* start, size_t length) 153 : types_(types), buffer_(start), end_(start + length) {} 154 155 // The types of the module that we're encoding. This is required in 156 // order to encode the original index of types that we encounter. 157 const TypeContext* types_; 158 159 // The current position in the buffer we're writing to. 160 uint8_t* buffer_; 161 // The end position in the buffer we're writing to. 162 const uint8_t* end_; 163 164 CoderResult writeBytes(const void* src, size_t length); 165 }; 166 167 // A Coder<MODE_DECODE> holds the buffer being read from 168 template <> 169 struct Coder<MODE_DECODE> { 170 Coder(const uint8_t* start, size_t length) 171 : types_(nullptr), buffer_(start), end_(start + length) {} 172 173 // The types of the module that we're decoding. This is null until the types 174 // of this module are decoded. 175 const TypeContext* types_; 176 177 // The current position in the buffer we're reading from. 178 const uint8_t* buffer_; 179 // The end position in the buffer we're reading from. 180 const uint8_t* end_; 181 182 CoderResult readBytes(void* dest, size_t length); 183 CoderResult readBytesRef(size_t length, const uint8_t** bytesBegin); 184 }; 185 186 // Macros to help types declare friendship with a coding function 187 188 #define WASM_DECLARE_FRIEND_SERIALIZE(TYPE) \ 189 template <CoderMode mode> \ 190 friend CoderResult Code##TYPE(Coder<mode>&, CoderArg<mode, TYPE>); 191 192 #define WASM_DECLARE_FRIEND_SERIALIZE_ARGS(TYPE, ARGS...) \ 193 template <CoderMode mode> \ 194 friend CoderResult Code##TYPE(Coder<mode>&, CoderArg<mode, TYPE>, ARGS); 195 196 // [SMDOC] "Cacheable POD" 197 // 198 // Module serialization relies on copying simple structs to and from the 199 // cache format. We need a way to ensure that we only do this on types that are 200 // "safe". We call this "cacheable POD". Note: this is not the same thing as 201 // "POD" as that may contain pointers, which are not cacheable. 202 // 203 // We define cacheable POD (C-POD) recursively upon types: 204 // 1. any integer type is C-POD 205 // 2. any floating point type is C-POD 206 // 3. any enum type is C-POD 207 // 4. any mozilla::Maybe<T> with T: C-POD is C-POD 208 // 5. any T[N] with T: C-POD is C-POD 209 // 6. any union where all fields are C-POD is C-POD 210 // 7. any struct with the following conditions must is C-POD 211 // * every field's type must be C-POD 212 // * the parent type, if it exists, must also be C-POD 213 // * there must be no virtual methods 214 // 215 // There are no combination of C++ type traits at this time that can 216 // automatically meet these criteria, so we are rolling our own system. 217 // 218 // We define a "IsCacheablePod" type trait, with builtin rules for cases (1-5). 219 // The complex cases (6-7) are handled using manual declaration and checking 220 // macros that must be used upon structs and unions that are considered 221 // cacheable POD. 222 // 223 // See the following macros for details: 224 // - WASM_DECLARE_CACHEABLE_POD 225 // - WASM_CHECK_CACHEABLE_POD[_WITH_PARENT] 226 227 // The IsCacheablePod type trait primary template. Contains the rules for 228 // (cases 1-3). 229 template <typename T> 230 struct IsCacheablePod 231 : public std::conditional_t<std::is_arithmetic_v<T> || std::is_enum_v<T>, 232 std::true_type, std::false_type> {}; 233 234 // Partial specialization for (case 4). 235 template <typename T> 236 struct IsCacheablePod<mozilla::Maybe<T>> 237 : public std::conditional_t<IsCacheablePod<T>::value, std::true_type, 238 std::false_type> {}; 239 240 // Partial specialization for (case 5). 241 template <typename T, size_t N> 242 struct IsCacheablePod<T[N]> 243 : public std::conditional_t<IsCacheablePod<T>::value, std::true_type, 244 std::false_type> {}; 245 246 template <class T> 247 inline constexpr bool is_cacheable_pod = IsCacheablePod<T>::value; 248 249 // Checks if derrived class will not use the structure alignment for its 250 // next field. It used when pod is a base class. 251 #define WASM_CHECK_CACHEABLE_POD_PADDING(Type) \ 252 class __CHECK_PADING_##Type : public Type { \ 253 public: \ 254 char c; \ 255 }; \ 256 static_assert(sizeof(__CHECK_PADING_##Type) > sizeof(Type), \ 257 #Type " will overlap with next field if inherited"); 258 259 // Declare the type 'Type' to be cacheable POD. The definition of the type must 260 // contain a WASM_CHECK_CACHEABLE_POD[_WITH_PARENT] to ensure all fields of the 261 // type are cacheable POD. 262 #define WASM_DECLARE_CACHEABLE_POD(Type) \ 263 static_assert(!std::is_polymorphic_v<Type>, \ 264 #Type "must not have virtual methods"); \ 265 } /* namespace wasm */ \ 266 } /* namespace js */ \ 267 template <> \ 268 struct js::wasm::IsCacheablePod<js::wasm::Type> : public std::true_type {}; \ 269 namespace js { \ 270 namespace wasm { 271 272 // Helper: check each field's type to be cacheable POD 273 #define WASM_CHECK_CACHEABLE_POD_FIELD_(Field) \ 274 static_assert(js::wasm::IsCacheablePod<decltype(Field)>::value, \ 275 #Field " must be cacheable pod"); 276 277 // Check every field in a type definition to ensure they are cacheable POD. 278 #define WASM_CHECK_CACHEABLE_POD(Fields...) \ 279 MOZ_FOR_EACH(WASM_CHECK_CACHEABLE_POD_FIELD_, (), (Fields)) 280 281 // Check every field in a type definition to ensure they are cacheable POD, and 282 // check that the parent class is also cacheable POD. 283 #define WASM_CHECK_CACHEABLE_POD_WITH_PARENT(Parent, Fields...) \ 284 static_assert(js::wasm::IsCacheablePod<Parent>::value, \ 285 #Parent " must be cacheable pod"); \ 286 MOZ_FOR_EACH(WASM_CHECK_CACHEABLE_POD_FIELD_, (), (Fields)) 287 288 // Allow fields that are not cacheable POD but are believed to be safe for 289 // serialization due to some justification. 290 #define WASM_ALLOW_NON_CACHEABLE_POD_FIELD(Field, Reason) \ 291 static_assert(!js::wasm::IsCacheablePod<decltype(Field)>::value, \ 292 #Field " is not cacheable due to " Reason); 293 294 } // namespace wasm 295 } // namespace js 296 297 #endif // wasm_serialize_h