tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

WasmFrame.h (19038B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 *
      4 * Copyright 2021 Mozilla Foundation
      5 *
      6 * Licensed under the Apache License, Version 2.0 (the "License");
      7 * you may not use this file except in compliance with the License.
      8 * You may obtain a copy of the License at
      9 *
     10 *     http://www.apache.org/licenses/LICENSE-2.0
     11 *
     12 * Unless required by applicable law or agreed to in writing, software
     13 * distributed under the License is distributed on an "AS IS" BASIS,
     14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15 * See the License for the specific language governing permissions and
     16 * limitations under the License.
     17 */
     18 
     19 /* [SMDOC] The WASM ABIs
     20 *
     21 * Wasm-internal ABI.
     22 *
     23 * The *Wasm-internal ABI* is the ABI a wasm function assumes when it is
     24 * entered, and the one it assumes when it is making a call to what it believes
     25 * is another wasm function.
     26 *
     27 * We pass the first function arguments in registers (GPR and FPU both) and the
     28 * rest on the stack, generally according to platform ABI conventions (which can
     29 * be hairy).  On x86-32 there are no register arguments.
     30 *
     31 * We have no callee-saves registers in the wasm-internal ABI, regardless of the
     32 * platform ABI conventions, though see below about InstanceReg or HeapReg.
     33 *
     34 * We return the last return value in the first return register, according to
     35 * platform ABI conventions.  If there is more than one return value, an area is
     36 * allocated in the caller's frame to receive the other return values, and the
     37 * address of this area is passed to the callee as the last argument.  Return
     38 * values except the last are stored in ascending order within this area.  Also
     39 * see below about alignment of this area and the values in it.
     40 *
     41 * When a function is entered, there are two incoming register values in
     42 * addition to the function's declared parameters: InstanceReg must have the
     43 * correct instance pointer, and HeapReg the correct memoryBase, for the
     44 * function.  (On x86-32 there is no HeapReg.)  From the instance we can get to
     45 * the JSContext, the instance, the MemoryBase, and many other things.  The
     46 * instance maps one-to-one with an instance.
     47 *
     48 * HeapReg and InstanceReg are not parameters in the usual sense, nor are they
     49 * callee-saves registers.  Instead they constitute global register state, the
     50 * purpose of which is to bias the call ABI in favor of intra-instance calls,
     51 * the predominant case where the caller and the callee have the same
     52 * InstanceReg and HeapReg values.
     53 *
     54 * With this global register state, literally no work needs to take place to
     55 * save and restore the instance and MemoryBase values across intra-instance
     56 * call boundaries.
     57 *
     58 * For inter-instance calls, in contrast, there must be an instance switch at
     59 * the call boundary: Before the call, the callee's instance must be loaded
     60 * (from a closure or from the import table), and from the instance we load the
     61 * callee's MemoryBase, the realm, and the JSContext.  The caller's and callee's
     62 * instance values must be stored into the frame (to aid unwinding), the
     63 * callee's realm must be stored into the JSContext, and the callee's instance
     64 * and MemoryBase values must be moved to appropriate registers.  After the
     65 * call, the caller's instance must be loaded, and from it the caller's
     66 * MemoryBase and realm, and the JSContext.  The realm must be stored into the
     67 * JSContext and the caller's instance and MemoryBase values must be moved to
     68 * appropriate registers.
     69 *
     70 * Direct calls to functions within the same module are always intra-instance,
     71 * while direct calls to imported functions are always inter-instance.  Indirect
     72 * calls -- call_indirect in the MVP, future call_ref and call_funcref -- may or
     73 * may not be intra-instance.
     74 *
     75 * call_indirect, and future call_funcref, also pass a signature value in a
     76 * register (even on x86-32), this is a small integer or a pointer value
     77 * denoting the caller's expected function signature.  The callee must compare
     78 * it to the value or pointer that denotes its actual signature, and trap on
     79 * mismatch.
     80 *
     81 * This is what the stack looks like during a call, after the callee has
     82 * completed the prologue:
     83 *
     84 *     |                                   |
     85 *     +-----------------------------------+ <-+
     86 *     |               ...                 |   |
     87 *     |      Caller's private frame       |   |
     88 *     +-----------------------------------+   |
     89 *     |   Multi-value return (optional)   |   |
     90 *     |               ...                 |   |
     91 *     +-----------------------------------+   |
     92 *     |       Stack args (optional)       |   |
     93 *     |               ...                 |   |
     94 *     +-----------------------------------+ -+|
     95 *     |          Caller instance slot     |   \
     96 *     |          Callee instance slot     |   | \
     97 *     +-----------------------------------+   |  \
     98 *     |       Shadowstack area (Win64)    |   |  wasm::FrameWithInstances
     99 *     |            (32 bytes)             |   |  /
    100 *     +-----------------------------------+   | /  <= SP "Before call"
    101 *     |          Return address           |   //   <= SP "After call"
    102 *     |             Saved FP          ----|--+/
    103 *     +-----------------------------------+ -+ <=  FP (a wasm::Frame*)
    104 *     |  DebugFrame, Locals, spills, etc  |
    105 *     |   (i.e., callee's private frame)  |
    106 *     |             ....                  |
    107 *     +-----------------------------------+    <=  SP
    108 *
    109 * The FrameWithInstances is a struct with four fields: the saved FP, the return
    110 * address, and the two instance slots; the shadow stack area is there only on
    111 * Win64 and is unused by wasm but is part of the native ABI, with which the
    112 * wasm ABI is mostly compatible.  The slots for caller and callee instance are
    113 * only populated by the instance switching code in inter-instance calls so that
    114 * stack unwinding can keep track of the correct instance value for each frame,
    115 * the instance not being obtainable from anywhere else.  Nothing in the frame
    116 * itself indicates directly whether the instance slots are valid - for that,
    117 * the return address must be used to look up a CallSite structure that carries
    118 * that information.
    119 *
    120 * (In Ion builds with full debug checks, we also write the callee instance to
    121 * its usual slot regardless of the previous logic. This allows us to recover
    122 * the current function's instance pointer at any time for debug checks. If it's
    123 * an inter-instance call, this work is merely redundant.)
    124 *
    125 * The stack area above the return address is owned by the caller, which may
    126 * deallocate the area on return or choose to reuse it for subsequent calls.
    127 * (The baseline compiler allocates and frees the stack args area and the
    128 * multi-value result area per call.  Ion reuses the areas and allocates them as
    129 * part of the overall activation frame when the procedure is entered; indeed,
    130 * the multi-value return area can be anywhere within the caller's private
    131 * frame, not necessarily directly above the stack args.)
    132 *
    133 * If the stack args area contain references, it is up to the callee's stack map
    134 * to name the locations where those references exist, and the caller's stack
    135 * map must not (redundantly) name those locations.  (The callee's ownership of
    136 * this area will be crucial for making tail calls work, as the types of the
    137 * locations can change if the callee makes a tail call.)  If pointer values are
    138 * spilled by anyone into the Shadowstack area they will not be traced.
    139 *
    140 * References in the multi-return area are covered by the caller's map, as these
    141 * slots outlive the call.
    142 *
    143 * The address "Before call", ie the part of the FrameWithInstances above the
    144 * Frame, must be aligned to WasmStackAlignment, and everything follows from
    145 * that, with padding inserted for alignment as required for stack arguments. In
    146 * turn WasmStackAlignment is at least as large as the largest parameter type.
    147 *
    148 * The address of the multiple-results area is currently 8-byte aligned by Ion
    149 * and its alignment in baseline is uncertain, see bug 1747787.  Result values
    150 * are stored packed within the area in fields whose size is given by
    151 * ResultStackSize(ValType), this breaks alignment too.  This all seems
    152 * underdeveloped.
    153 *
    154 * In the wasm-internal ABI, the ARM64 PseudoStackPointer (PSP) is garbage on
    155 * entry but must be synced with the real SP at the point the function returns.
    156 *
    157 *
    158 * The Wasm Builtin ABIs.
    159 *
    160 * Also see `[SMDOC] Process-wide builtin thunk set` in WasmBuiltins.cpp.
    161 *
    162 * The *Wasm-builtin ABIs* comprise the ABIs used when wasm makes calls directly
    163 * to the C++ runtime (but not to the JS interpreter), including instance
    164 * methods, helpers for operations such as 64-bit division on 32-bit systems,
    165 * allocation and writer barriers, conversions to/from JS values, special
    166 * fast-path JS imports, and trap handling.
    167 *
    168 * The callee of a builtin call will always assume the C/C++ ABI.  Therefore
    169 * every volatile (caller-saves) register that wasm uses must be saved across
    170 * the call, the stack must be aligned as for a C/C++-ABI call before the call,
    171 * and any ABI registers the callee expect to have specific values must be set
    172 * up (eg the frame pointer, if the C/C++ ABI assumes it is set).
    173 *
    174 * Most builtin calls are straightforward: the wasm caller knows that it is
    175 * performing a call, and so it saves live registers, moves arguments into ABI
    176 * locations, etc, before calling.  Abstractions in the masm make sure to pass
    177 * the instance pointer to an instance "method" call and to restore the
    178 * InstanceReg and HeapReg after the call.  In these straightforward cases,
    179 * calling the builtin additionally amounts to:
    180 *
    181 *  - exiting the wasm activation
    182 *  - adjusting parameter values to account for platform weirdness (FP arguments
    183 *    are handled differently in the C/C++ ABIs on ARM and x86-32 than in the
    184 *    Wasm ABI)
    185 *  - copying stack arguments into place for the C/C++ ABIs
    186 *  - making the call
    187 *  - adjusting the return values on return
    188 *  - re-entering the wasm activation and returning to the wasm caller
    189 *
    190 * The steps above are performed by the *builtin thunk* for the builtin and the
    191 * builtin itself is said to be *thunked*.  Going via the thunk is simple and,
    192 * except for always having to copy stack arguments on x86-32 and the extra call
    193 * in the thunk, close to as fast as we can make it without heroics.  Except for
    194 * the arithmetic helpers on 32-bit systems, most builtins are rarely used, are
    195 * asm.js-specific, or are expensive anyway, and the overhead of the extra call
    196 * doesn't matter.
    197 *
    198 * A few builtins for special purposes are *unthunked* and fall into two
    199 * classes: they would normally be thunked but are used in circumstances where
    200 * the VM is in an unusual state; or they do their work within the activation.
    201 *
    202 * In the former class, we find the debug trap handler, which must preserve all
    203 * live registers because it is called in contexts where live registers have not
    204 * been saved; argument coercion functions, which are called while a call frame
    205 * is being built for a JS->Wasm or Wasm->JS call; and other routines that have
    206 * special needs for constructing the call.  These all exit the activation, but
    207 * handle the exit specially.
    208 *
    209 * In the latter class, we find two functions that abandon the VM state and
    210 * unwind the activation, HandleThrow and HandleTrap; and some debug print
    211 * functions that do not affect the VM state at all.
    212 *
    213 * To summarize, when wasm calls a builtin thunk the stack will end up looking
    214 * like this from within the C++ code:
    215 *
    216 *      |                         |
    217 *      +-------------------------+
    218 *      |        Wasm frame       |
    219 *      +-------------------------+
    220 *      |    Thunk frame (exit)   |
    221 *      +-------------------------+
    222 *      |   Builtin frame (C++)   |
    223 *      +-------------------------+  <= SP
    224 *
    225 * There is an assumption in the profiler (in initFromExitFP) that an exit has
    226 * left precisely one frame on the stack for the thunk itself.  There may be
    227 * additional assumptions elsewhere, not yet found.
    228 *
    229 * Very occasionally, Wasm will call C++ without going through the builtin
    230 * thunks, and this can be a source of problems.  The one case I know about
    231 * right now is that the JS pre-barrier filtering code is called directly from
    232 * Wasm, see bug 1464157.
    233 *
    234 *
    235 * Wasm stub ABIs.
    236 *
    237 * Also see `[SMDOC] Exported wasm functions and the jit-entry stubs` in
    238 * WasmJS.cpp.
    239 *
    240 * The "stub ABIs" are not properly speaking ABIs themselves, but ABI
    241 * converters.  An "entry" stub calls in to wasm and an "exit" stub calls out
    242 * from wasm.  The entry stubs must convert from whatever data formats the
    243 * caller has to wasm formats (and in the future must provide some kind of type
    244 * checking for pointer types); the exit stubs convert from wasm formats to the
    245 * callee's expected format.
    246 *
    247 * There are different entry paths from the JS interpreter (using the C++ ABI
    248 * and data formats) and from jitted JS code (using the JIT ABI and data
    249 * formats); indeed there is a "normal" JitEntry path ("JitEntry") that will
    250 * perform argument and return value conversion, and the "fast" JitEntry path
    251 * ("DirectCallFromJit") that is only used when it is known that the JIT will
    252 * only pass and receive wasm-compatible data and no conversion is needed.
    253 *
    254 * Similarly, there are different exit paths to the interpreter (using the C++
    255 * ABI and data formats) and to JS JIT code (using the JIT ABI and data
    256 * formats).  Also, builtin calls described above are themselves a type of exit,
    257 * and builtin thunks are properly a type of exit stub.
    258 *
    259 * Data conversions are difficult because the VM is in an intermediate state
    260 * when they happen, we want them to be fast when possible, and some conversions
    261 * can re-enter both JS code and wasm code.
    262 */
    263 
    264 #ifndef wasm_frame_h
    265 #define wasm_frame_h
    266 
    267 #include "mozilla/Assertions.h"
    268 
    269 #include <stddef.h>
    270 #include <stdint.h>
    271 #include <type_traits>
    272 
    273 #include "jit/Registers.h"  // For js::jit::ShadowStackSpace
    274 
    275 namespace js {
    276 namespace wasm {
    277 
    278 class Instance;
    279 
    280 // Bit tag set when exiting wasm code in JitActivation's exitFP.
    281 constexpr uintptr_t ExitFPTag = 0x1;
    282 
    283 // wasm::Frame represents the bytes pushed by the call instruction and the
    284 // fixed prologue generated by wasm::GenerateCallablePrologue.
    285 //
    286 // Across all architectures it is assumed that, before the call instruction, the
    287 // stack pointer is WasmStackAlignment-aligned. Thus after the prologue, and
    288 // before the function has made its stack reservation, the stack alignment is
    289 // sizeof(Frame) % WasmStackAlignment.
    290 //
    291 // During MacroAssembler code generation, the bytes pushed after the wasm::Frame
    292 // are counted by masm.framePushed. Thus, the stack alignment at any point in
    293 // time is (sizeof(wasm::Frame) + masm.framePushed) % WasmStackAlignment.
    294 
    295 class Frame {
    296  // See GenerateCallableEpilogue for why this must be
    297  // the first field of wasm::Frame (in a downward-growing stack).
    298  // It's either the caller's Frame*, for wasm callers, or the JIT caller frame
    299  // plus a tag otherwise.
    300  uint8_t* callerFP_;
    301 
    302  // The return address pushed by the call (in the case of ARM/MIPS the return
    303  // address is pushed by the first instruction of the prologue).
    304  void* returnAddress_;
    305 
    306 public:
    307  static constexpr uint32_t callerFPOffset() {
    308    return offsetof(Frame, callerFP_);
    309  }
    310  static constexpr uint32_t returnAddressOffset() {
    311    return offsetof(Frame, returnAddress_);
    312  }
    313 
    314  uint8_t* returnAddress() const {
    315    return reinterpret_cast<uint8_t*>(returnAddress_);
    316  }
    317 
    318  void** addressOfReturnAddress() {
    319    return reinterpret_cast<void**>(&returnAddress_);
    320  }
    321 
    322  uint8_t* rawCaller() const { return callerFP_; }
    323 
    324  Frame* wasmCaller() const { return reinterpret_cast<Frame*>(callerFP_); }
    325 
    326  uint8_t* jitEntryCaller() const { return callerFP_; }
    327 
    328  static const Frame* fromUntaggedWasmExitFP(const void* savedFP) {
    329    MOZ_ASSERT(!isExitFP(savedFP));
    330    return reinterpret_cast<const Frame*>(savedFP);
    331  }
    332 
    333  static bool isExitFP(const void* fp) {
    334    return reinterpret_cast<uintptr_t>(fp) & ExitFPTag;
    335  }
    336 
    337  static uint8_t* untagExitFP(const void* fp) {
    338    MOZ_ASSERT(isExitFP(fp));
    339    return reinterpret_cast<uint8_t*>(reinterpret_cast<uintptr_t>(fp) &
    340                                      ~ExitFPTag);
    341  }
    342 
    343  static uint8_t* addExitFPTag(const Frame* fp) {
    344    MOZ_ASSERT(!isExitFP(fp));
    345    return reinterpret_cast<uint8_t*>(reinterpret_cast<uintptr_t>(fp) |
    346                                      ExitFPTag);
    347  }
    348 };
    349 
    350 static_assert(!std::is_polymorphic_v<Frame>, "Frame doesn't need a vtable.");
    351 static_assert(sizeof(Frame) == 2 * sizeof(void*),
    352              "Frame is a two pointer structure");
    353 
    354 class FrameWithShadowStackSpace : public Frame {
    355 protected:
    356  // `ShadowStackSpace` bytes will be allocated here on Win64, at higher
    357  // addresses than Frame and at lower addresses than the instance fields.
    358  uint8_t shadowStackSpace_[js::jit::ShadowStackSpace];
    359 };
    360 
    361 class FrameWithInstances
    362    : public std::conditional_t<js::jit::ShadowStackSpace >= 1,
    363                                FrameWithShadowStackSpace, Frame> {
    364  // The instance area MUST be two pointers exactly.
    365  Instance* calleeInstance_;
    366  Instance* callerInstance_;
    367 
    368 public:
    369  Instance* calleeInstance() { return calleeInstance_; }
    370  Instance* callerInstance() { return callerInstance_; }
    371 
    372  Instance* setCalleeInstance(Instance* instance) {
    373    return calleeInstance_ = instance;
    374  }
    375 
    376  constexpr static uint32_t sizeOfInstanceFields() {
    377    return sizeof(wasm::FrameWithInstances) - sizeof(wasm::Frame) -
    378           js::jit::ShadowStackSpace;
    379  }
    380 
    381  constexpr static uint32_t sizeOfInstanceFieldsAndShadowStack() {
    382    return sizeof(wasm::FrameWithInstances) - sizeof(wasm::Frame);
    383  }
    384 
    385  constexpr static uint32_t calleeInstanceOffset() {
    386    return offsetof(FrameWithInstances, calleeInstance_);
    387  }
    388 
    389  constexpr static uint32_t calleeInstanceOffsetWithoutFrame() {
    390    return calleeInstanceOffset() - sizeof(wasm::Frame);
    391  }
    392 
    393  constexpr static uint32_t callerInstanceOffset() {
    394    return offsetof(FrameWithInstances, callerInstance_);
    395  }
    396 
    397  constexpr static uint32_t callerInstanceOffsetWithoutFrame() {
    398    return callerInstanceOffset() - sizeof(wasm::Frame);
    399  }
    400 };
    401 
    402 static_assert(FrameWithInstances::calleeInstanceOffsetWithoutFrame() ==
    403                  js::jit::ShadowStackSpace,
    404              "Callee instance stored right above the return address.");
    405 static_assert(FrameWithInstances::callerInstanceOffsetWithoutFrame() ==
    406                  js::jit::ShadowStackSpace + sizeof(void*),
    407              "Caller instance stored right above the callee instance.");
    408 
    409 static_assert(FrameWithInstances::sizeOfInstanceFields() == 2 * sizeof(void*),
    410              "There are only two additional slots");
    411 
    412 #if defined(JS_CODEGEN_ARM64)
    413 static_assert(sizeof(Frame) % 16 == 0, "frame is aligned");
    414 #endif
    415 
    416 }  // namespace wasm
    417 }  // namespace js
    418 
    419 #endif  // wasm_frame_h