tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

WasmBCFrame.h (52366B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 *
      4 * Copyright 2016 Mozilla Foundation
      5 *
      6 * Licensed under the Apache License, Version 2.0 (the "License");
      7 * you may not use this file except in compliance with the License.
      8 * You may obtain a copy of the License at
      9 *
     10 *     http://www.apache.org/licenses/LICENSE-2.0
     11 *
     12 * Unless required by applicable law or agreed to in writing, software
     13 * distributed under the License is distributed on an "AS IS" BASIS,
     14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15 * See the License for the specific language governing permissions and
     16 * limitations under the License.
     17 */
     18 
     19 // This is an INTERNAL header for Wasm baseline compiler: CPU stack frame,
     20 // stack maps, and associated logic.
     21 
     22 #ifndef wasm_wasm_baseline_frame_h
     23 #define wasm_wasm_baseline_frame_h
     24 
     25 #include "wasm/WasmBaselineCompile.h"  // For BaseLocalIter
     26 #include "wasm/WasmBCDefs.h"
     27 #include "wasm/WasmBCRegDefs.h"
     28 #include "wasm/WasmBCStk.h"
     29 #include "wasm/WasmConstants.h"  // For MaxFrameSize
     30 
     31 // [SMDOC] Wasm baseline compiler's stack frame.
     32 //
     33 // For background, see "Wasm's ABIs" in WasmFrame.h, the following should never
     34 // be in conflict with that.
     35 //
     36 // The stack frame has four parts ("below" means at lower addresses):
     37 //
     38 //  - the Frame element;
     39 //  - the Local area, including the DebugFrame element and possibly a spilled
     40 //    pointer to stack results, if any; allocated below the header with various
     41 //    forms of alignment;
     42 //  - the Dynamic area, comprising the temporary storage the compiler uses for
     43 //    register spilling, allocated below the Local area;
     44 //  - the Arguments area, comprising memory allocated for outgoing calls,
     45 //    allocated below the Dynamic area.
     46 //
     47 //                +==============================+
     48 //                |    Incoming stack arg        |
     49 //                |    ...                       |
     50 // -------------  +==============================+
     51 //                |    Frame (fixed size)        |
     52 // -------------  +==============================+ <-------------------- FP
     53 //         ^      |    DebugFrame (optional)     |    ^  ^             ^^
     54 //   localSize    |    Register arg local        |    |  |             ||
     55 //         |      |    ...                       |    |  |     framePushed
     56 //         |      |    Register stack result ptr?|    |  |             ||
     57 //         |      |    Non-arg local             |    |  |             ||
     58 //         |      |    ...                       |    |  |             ||
     59 //         |      |    (padding)                 |    |  |             ||
     60 //         |      |    Instance pointer          |    |  |             ||
     61 //         |      +------------------------------+    |  |             ||
     62 //         v      |    (padding)                 |    |  v             ||
     63 // -------------  +==============================+ currentStackHeight  ||
     64 //         ^      |    Dynamic (variable size)   |    |                ||
     65 //  dynamicSize   |    ...                       |    |                ||
     66 //         v      |    ...                       |    v                ||
     67 // -------------  |    (free space, sometimes)   | ---------           v|
     68 //                +==============================+ <----- SP not-during calls
     69 //                |    Arguments (sometimes)     |                      |
     70 //                |    ...                       |                      v
     71 //                +==============================+ <----- SP during calls
     72 //
     73 // The Frame is addressed off the stack pointer.  masm.framePushed() is always
     74 // correct, and masm.getStackPointer() + masm.framePushed() always addresses the
     75 // Frame, with the DebugFrame optionally below it.
     76 //
     77 // The Local area (including the DebugFrame and, if needed, the spilled value of
     78 // the stack results area pointer) is laid out by BaseLocalIter and is allocated
     79 // and deallocated by standard prologue and epilogue functions that manipulate
     80 // the stack pointer, but it is accessed via BaseStackFrame.
     81 //
     82 // The Dynamic area is maintained by and accessed via BaseStackFrame.  On some
     83 // systems (such as ARM64), the Dynamic memory may be allocated in chunks
     84 // because the SP needs a specific alignment, and in this case there will
     85 // normally be some free space directly above the SP.  The stack height does not
     86 // include the free space, it reflects the logically used space only.
     87 //
     88 // The Dynamic area is where space for stack results is allocated when calling
     89 // functions that return results on the stack.  If a function has stack results,
     90 // a pointer to the low address of the stack result area is passed as an
     91 // additional argument, according to the usual ABI.  See
     92 // ABIResultIter::HasStackResults.
     93 //
     94 // The Arguments area is allocated and deallocated via BaseStackFrame (see
     95 // comments later) but is accessed directly off the stack pointer.
     96 
     97 namespace js {
     98 namespace wasm {
     99 
    100 using namespace js::jit;
    101 
    102 // Abstraction of the height of the stack frame, to avoid type confusion.
    103 
    104 class StackHeight {
    105  friend class BaseStackFrameAllocator;
    106 
    107  uint32_t height;
    108 
    109 public:
    110  explicit StackHeight(uint32_t h) : height(h) {}
    111  static StackHeight Invalid() { return StackHeight(UINT32_MAX); }
    112  bool isValid() const { return height != UINT32_MAX; }
    113  bool operator==(StackHeight rhs) const {
    114    MOZ_ASSERT(isValid() && rhs.isValid());
    115    return height == rhs.height;
    116  }
    117  bool operator!=(StackHeight rhs) const { return !(*this == rhs); }
    118 };
    119 
    120 // Abstraction for where multi-value results go on the machine stack.
    121 
    122 class StackResultsLoc {
    123  uint32_t bytes_;
    124  size_t count_;
    125  mozilla::Maybe<uint32_t> height_;
    126 
    127 public:
    128  StackResultsLoc() : bytes_(0), count_(0) {};
    129  StackResultsLoc(uint32_t bytes, size_t count, uint32_t height)
    130      : bytes_(bytes), count_(count), height_(mozilla::Some(height)) {
    131    MOZ_ASSERT(bytes != 0);
    132    MOZ_ASSERT(count != 0);
    133    MOZ_ASSERT(height != 0);
    134  }
    135 
    136  uint32_t bytes() const { return bytes_; }
    137  uint32_t count() const { return count_; }
    138  uint32_t height() const { return height_.value(); }
    139 
    140  bool hasStackResults() const { return bytes() != 0; }
    141  StackResults stackResults() const {
    142    return hasStackResults() ? StackResults::HasStackResults
    143                             : StackResults::NoStackResults;
    144  }
    145 };
    146 
    147 // Abstraction of the baseline compiler's stack frame (except for the Frame /
    148 // DebugFrame parts).  See comments above for more.  Remember, "below" on the
    149 // stack means at lower addresses.
    150 //
    151 // The abstraction is split into two parts: BaseStackFrameAllocator is
    152 // responsible for allocating and deallocating space on the stack and for
    153 // performing computations that are affected by how the allocation is performed;
    154 // BaseStackFrame then provides a pleasant interface for stack frame management.
    155 
    156 class BaseStackFrameAllocator {
    157  MacroAssembler& masm;
    158 
    159 #ifdef RABALDR_CHUNKY_STACK
    160  // On platforms that require the stack pointer to be aligned on a boundary
    161  // greater than the typical stack item (eg, ARM64 requires 16-byte alignment
    162  // but items are 8 bytes), allocate stack memory in chunks, and use a
    163  // separate stack height variable to track the effective stack pointer
    164  // within the allocated area.  Effectively, there's a variable amount of
    165  // free space directly above the stack pointer.  See diagram above.
    166 
    167  // The following must be true in order for the stack height to be
    168  // predictable at control flow joins:
    169  //
    170  // - The Local area is always aligned according to WasmStackAlignment, ie,
    171  //   masm.framePushed() % WasmStackAlignment is zero after allocating
    172  //   locals.
    173  //
    174  // - ChunkSize is always a multiple of WasmStackAlignment.
    175  //
    176  // - Pushing and popping are always in units of ChunkSize (hence preserving
    177  //   alignment).
    178  //
    179  // - The free space on the stack (masm.framePushed() - currentStackHeight_)
    180  //   is a predictable (nonnegative) amount.
    181 
    182  // As an optimization, we pre-allocate some space on the stack, the size of
    183  // this allocation is InitialChunk and it must be a multiple of ChunkSize.
    184  // It is allocated as part of the function prologue and deallocated as part
    185  // of the epilogue, along with the locals.
    186  //
    187  // If ChunkSize is too large then we risk overflowing the stack on simple
    188  // recursions with few live values where stack overflow should not be a
    189  // risk; if it is too small we spend too much time adjusting the stack
    190  // pointer.
    191  //
    192  // Good values for ChunkSize are the subject of future empirical analysis;
    193  // eight words is just an educated guess.
    194 
    195  static constexpr uint32_t ChunkSize = 8 * sizeof(void*);
    196  static constexpr uint32_t InitialChunk = ChunkSize;
    197 
    198  // The current logical height of the frame is
    199  //   currentStackHeight_ = localSize_ + dynamicSize
    200  // where dynamicSize is not accounted for explicitly and localSize_ also
    201  // includes size for the DebugFrame.
    202  //
    203  // The allocated size of the frame, provided by masm.framePushed(), is usually
    204  // larger than currentStackHeight_, notably at the beginning of execution when
    205  // we've allocated InitialChunk extra space.
    206 
    207  uint32_t currentStackHeight_;
    208 #endif
    209 
    210  // Size of the Local area in bytes (stable after BaseCompiler::init() has
    211  // called BaseStackFrame::setupLocals(), which in turn calls
    212  // BaseStackFrameAllocator::setLocalSize()), always rounded to the proper
    213  // stack alignment.  The Local area is then allocated in beginFunction(),
    214  // following the allocation of the Header.  See onFixedStackAllocated()
    215  // below.
    216 
    217  uint32_t localSize_;
    218 
    219 protected:
    220  ///////////////////////////////////////////////////////////////////////////
    221  //
    222  // Initialization
    223 
    224  explicit BaseStackFrameAllocator(MacroAssembler& masm)
    225      : masm(masm),
    226 #ifdef RABALDR_CHUNKY_STACK
    227        currentStackHeight_(0),
    228 #endif
    229        localSize_(UINT32_MAX) {
    230  }
    231 
    232 protected:
    233  //////////////////////////////////////////////////////////////////////
    234  //
    235  // The Local area - the static part of the frame.
    236 
    237  // Record the size of the Local area, once it is known.
    238 
    239  void setLocalSize(uint32_t localSize) {
    240    MOZ_ASSERT(localSize == AlignBytes(localSize, sizeof(void*)),
    241               "localSize_ should be aligned to at least a pointer");
    242    MOZ_ASSERT(localSize_ == UINT32_MAX);
    243    localSize_ = localSize;
    244  }
    245 
    246  // Record the current stack height, after it has become stable in
    247  // beginFunction().  See also BaseStackFrame::onFixedStackAllocated().
    248 
    249  void onFixedStackAllocated() {
    250    MOZ_ASSERT(localSize_ != UINT32_MAX);
    251 #ifdef RABALDR_CHUNKY_STACK
    252    currentStackHeight_ = localSize_;
    253 #endif
    254  }
    255 
    256 public:
    257  // The fixed amount of memory, in bytes, allocated on the stack below the
    258  // Header for purposes such as locals and other fixed values.  Includes all
    259  // necessary alignment, and on ARM64 also the initial chunk for the working
    260  // stack memory.
    261 
    262  uint32_t fixedAllocSize() const {
    263    MOZ_ASSERT(localSize_ != UINT32_MAX);
    264 #ifdef RABALDR_CHUNKY_STACK
    265    return localSize_ + InitialChunk;
    266 #else
    267    return localSize_;
    268 #endif
    269  }
    270 
    271 #ifdef RABALDR_CHUNKY_STACK
    272  // The allocated frame size is frequently larger than the logical stack
    273  // height; we round up to a chunk boundary, and special case the initial
    274  // chunk.
    275  uint32_t framePushedForHeight(uint32_t logicalHeight) {
    276    if (logicalHeight <= fixedAllocSize()) {
    277      return fixedAllocSize();
    278    }
    279    return fixedAllocSize() +
    280           AlignBytes(logicalHeight - fixedAllocSize(), ChunkSize);
    281  }
    282 #endif
    283 
    284 protected:
    285  //////////////////////////////////////////////////////////////////////
    286  //
    287  // The Dynamic area - the dynamic part of the frame, for spilling and saving
    288  // intermediate values.
    289 
    290  // Offset off of sp_ for the slot at stack area location `offset`.
    291 
    292  int32_t stackOffset(int32_t offset) {
    293    MOZ_ASSERT(offset > 0);
    294    return masm.framePushed() - offset;
    295  }
    296 
    297  uint32_t computeHeightWithStackResults(StackHeight stackBase,
    298                                         uint32_t stackResultBytes) {
    299    MOZ_ASSERT(stackResultBytes);
    300    MOZ_ASSERT(currentStackHeight() >= stackBase.height);
    301    return stackBase.height + stackResultBytes;
    302  }
    303 
    304 #ifdef RABALDR_CHUNKY_STACK
    305  void pushChunkyBytes(uint32_t bytes) {
    306    checkChunkyInvariants();
    307    uint32_t freeSpace = masm.framePushed() - currentStackHeight_;
    308    if (freeSpace < bytes) {
    309      uint32_t bytesToReserve = AlignBytes(bytes - freeSpace, ChunkSize);
    310      MOZ_ASSERT(bytesToReserve + freeSpace >= bytes);
    311      masm.reserveStack(bytesToReserve);
    312    }
    313    currentStackHeight_ += bytes;
    314    checkChunkyInvariants();
    315  }
    316 
    317  void popChunkyBytes(uint32_t bytes) {
    318    checkChunkyInvariants();
    319    currentStackHeight_ -= bytes;
    320    // Sometimes, popChunkyBytes() is used to pop a larger area, as when we drop
    321    // values consumed by a call, and we may need to drop several chunks.  But
    322    // never drop the initial chunk.  Crucially, the amount we drop is always an
    323    // integral number of chunks.
    324    uint32_t freeSpace = masm.framePushed() - currentStackHeight_;
    325    if (freeSpace >= ChunkSize) {
    326      uint32_t targetAllocSize = framePushedForHeight(currentStackHeight_);
    327      uint32_t amountToFree = masm.framePushed() - targetAllocSize;
    328      MOZ_ASSERT(amountToFree % ChunkSize == 0);
    329      if (amountToFree) {
    330        masm.freeStack(amountToFree);
    331      }
    332    }
    333    checkChunkyInvariants();
    334  }
    335 #endif
    336 
    337  uint32_t currentStackHeight() const {
    338 #ifdef RABALDR_CHUNKY_STACK
    339    return currentStackHeight_;
    340 #else
    341    return masm.framePushed();
    342 #endif
    343  }
    344 
    345 private:
    346 #ifdef RABALDR_CHUNKY_STACK
    347  void checkChunkyInvariants() {
    348    MOZ_ASSERT(masm.framePushed() >= fixedAllocSize());
    349    MOZ_ASSERT(masm.framePushed() >= currentStackHeight_);
    350    MOZ_ASSERT(masm.framePushed() == fixedAllocSize() ||
    351               masm.framePushed() - currentStackHeight_ < ChunkSize);
    352    MOZ_ASSERT((masm.framePushed() - localSize_) % ChunkSize == 0);
    353  }
    354 #endif
    355 
    356  // For a given stack height, return the appropriate size of the allocated
    357  // frame.
    358 
    359  uint32_t framePushedForHeight(StackHeight stackHeight) {
    360 #ifdef RABALDR_CHUNKY_STACK
    361    // A more complicated adjustment is needed.
    362    return framePushedForHeight(stackHeight.height);
    363 #else
    364    // The allocated frame size equals the stack height.
    365    return stackHeight.height;
    366 #endif
    367  }
    368 
    369 public:
    370  // The current height of the stack area, not necessarily zero-based, in a
    371  // type-safe way.
    372 
    373  StackHeight stackHeight() const { return StackHeight(currentStackHeight()); }
    374 
    375  // Set the frame height to a previously recorded value.
    376 
    377  void setStackHeight(StackHeight amount) {
    378 #ifdef RABALDR_CHUNKY_STACK
    379    currentStackHeight_ = amount.height;
    380    masm.setFramePushed(framePushedForHeight(amount));
    381    checkChunkyInvariants();
    382 #else
    383    masm.setFramePushed(amount.height);
    384 #endif
    385  }
    386 
    387  // The current height of the dynamic part of the stack area (ie, the backing
    388  // store for the evaluation stack), zero-based.
    389 
    390  uint32_t dynamicHeight() const { return currentStackHeight() - localSize_; }
    391 
    392  // Before branching to an outer control label, pop the execution stack to
    393  // the level expected by that region, but do not update masm.framePushed()
    394  // as that will happen as compilation leaves the block.
    395  //
    396  // Note these operate directly on the stack pointer register.
    397 
    398  void popStackBeforeBranch(StackHeight destStackHeight,
    399                            uint32_t stackResultBytes) {
    400    uint32_t framePushedHere = masm.framePushed();
    401    StackHeight heightThere =
    402        StackHeight(destStackHeight.height + stackResultBytes);
    403    uint32_t framePushedThere = framePushedForHeight(heightThere);
    404    if (framePushedHere > framePushedThere) {
    405      masm.addToStackPtr(Imm32(framePushedHere - framePushedThere));
    406    }
    407  }
    408 
    409  void popStackBeforeBranch(StackHeight destStackHeight, ResultType type) {
    410    popStackBeforeBranch(destStackHeight,
    411                         ABIResultIter::MeasureStackBytes(type));
    412  }
    413 
    414  // Given that there are |stackParamSize| bytes on the dynamic stack
    415  // corresponding to the stack results, return the stack height once these
    416  // parameters are popped.
    417 
    418  StackHeight stackResultsBase(uint32_t stackParamSize) {
    419    return StackHeight(currentStackHeight() - stackParamSize);
    420  }
    421 
    422  // For most of WebAssembly, adjacent instructions have fallthrough control
    423  // flow between them, which allows us to simply thread the current stack
    424  // height through the compiler.  There are two exceptions to this rule: when
    425  // leaving a block via dead code, and when entering the "else" arm of an "if".
    426  // In these cases, the stack height is the block entry height, plus any stack
    427  // values (results in the block exit case, parameters in the else entry case).
    428 
    429  void resetStackHeight(StackHeight destStackHeight, ResultType type) {
    430    uint32_t height = destStackHeight.height;
    431    height += ABIResultIter::MeasureStackBytes(type);
    432    setStackHeight(StackHeight(height));
    433  }
    434 
    435  // Return offset of stack result.
    436 
    437  uint32_t locateStackResult(const ABIResult& result, StackHeight stackBase,
    438                             uint32_t stackResultBytes) {
    439    MOZ_ASSERT(result.onStack());
    440    MOZ_ASSERT(result.stackOffset() + result.size() <= stackResultBytes);
    441    uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
    442    return end - result.stackOffset();
    443  }
    444 
    445 public:
    446  //////////////////////////////////////////////////////////////////////
    447  //
    448  // The Argument area - for outgoing calls.
    449  //
    450  // We abstract these operations as an optimization: we can merge the freeing
    451  // of the argument area and dropping values off the stack after a call.  But
    452  // they always amount to manipulating the real stack pointer by some amount.
    453  //
    454  // Note that we do not update currentStackHeight_ for this; the frame does
    455  // not know about outgoing arguments.  But we do update framePushed(), so we
    456  // can still index into the frame below the outgoing arguments area.
    457 
    458  // This is always equivalent to a masm.reserveStack() call.
    459 
    460  void allocArgArea(size_t argSize) {
    461    if (argSize) {
    462      masm.reserveStack(argSize);
    463    }
    464  }
    465 
    466  // This frees the argument area allocated by allocArgArea(), and `argSize`
    467  // must be equal to the `argSize` argument to allocArgArea().  In addition
    468  // we drop some values from the frame, corresponding to the values that were
    469  // consumed by the call.
    470 
    471  void freeArgAreaAndPopBytes(size_t argSize, size_t dropSize) {
    472    // The method is called to re-initialize SP after the call. Note that
    473    // this operation shall not be optimized for argSize + dropSize == 0.
    474 #ifdef RABALDR_CHUNKY_STACK
    475    // Freeing the outgoing arguments and freeing the consumed values have
    476    // different semantics here, which is why the operation is split.
    477    masm.freeStackTo(masm.framePushed() - argSize);
    478    popChunkyBytes(dropSize);
    479 #else
    480    masm.freeStackTo(masm.framePushed() - (argSize + dropSize));
    481 #endif
    482  }
    483 };
    484 
    485 class BaseStackFrame final : public BaseStackFrameAllocator {
    486  MacroAssembler& masm;
    487 
    488  // The largest observed value of masm.framePushed(), ie, the size of the
    489  // stack frame.  Read this for its true value only when code generation is
    490  // finished.
    491  uint32_t maxFramePushed_;
    492 
    493  // Patch point where we check for stack overflow.
    494  CodeOffset stackAddOffset_;
    495 
    496  // Low byte offset of pointer to stack results, if any.
    497  mozilla::Maybe<int32_t> stackResultsPtrOffset_;
    498 
    499  // The offset of instance pointer.
    500  uint32_t instancePointerOffset_;
    501 
    502  // Low byte offset of local area for true locals (not parameters).
    503  uint32_t varLow_;
    504 
    505  // High byte offset + 1 of local area for true locals.
    506  uint32_t varHigh_;
    507 
    508  // The stack pointer, cached for brevity.
    509  RegisterOrSP sp_;
    510 
    511 public:
    512  explicit BaseStackFrame(MacroAssembler& masm)
    513      : BaseStackFrameAllocator(masm),
    514        masm(masm),
    515        maxFramePushed_(0),
    516        stackAddOffset_(0),
    517        instancePointerOffset_(UINT32_MAX),
    518        varLow_(UINT32_MAX),
    519        varHigh_(UINT32_MAX),
    520        sp_(masm.getStackPointer()) {}
    521 
    522  ///////////////////////////////////////////////////////////////////////////
    523  //
    524  // Stack management and overflow checking
    525 
    526  // This must be called once beginFunction has allocated space for the Header
    527  // (the Frame and DebugFrame) and the Local area, and will record the current
    528  // frame size for internal use by the stack abstractions.
    529 
    530  void onFixedStackAllocated() {
    531    maxFramePushed_ = masm.framePushed();
    532    BaseStackFrameAllocator::onFixedStackAllocated();
    533  }
    534 
    535  // We won't know until after we've generated code how big the frame will be
    536  // (we may need arbitrary spill slots and outgoing param slots) so emit a
    537  // patchable add that is patched in endFunction().
    538  //
    539  // Note the platform scratch register may be used by branchPtr(), so
    540  // generally tmp must be something else.
    541 
    542  void checkStack(Register tmp1, Register tmp2, Label* stackOverflowTrap) {
    543    masm.loadPtr(Address(InstanceReg, wasm::Instance::offsetOfCx()), tmp2);
    544    stackAddOffset_ = masm.sub32FromStackPtrWithPatch(tmp1);
    545    masm.branchPtr(Assembler::AboveOrEqual,
    546                   Address(tmp2, JSContext::offsetOfWasm() +
    547                                     wasm::Context::offsetOfStackLimit()),
    548                   tmp1, stackOverflowTrap);
    549  }
    550 
    551  void patchCheckStack() {
    552    masm.patchSub32FromStackPtr(stackAddOffset_,
    553                                Imm32(int32_t(maxFramePushed_)));
    554  }
    555 
    556  // Very large frames are implausible, probably an attack.
    557 
    558  bool checkStackHeight() { return maxFramePushed_ <= MaxFrameSize; }
    559 
    560  ///////////////////////////////////////////////////////////////////////////
    561  //
    562  // Local area
    563 
    564  struct Local {
    565    // Type of the value.
    566    const MIRType type;
    567 
    568    // Byte offset from Frame "into" the locals, ie positive for true locals
    569    // and negative for incoming args that read directly from the arg area.
    570    // It assumes the stack is growing down and that locals are on the stack
    571    // at lower addresses than Frame, and is the offset from Frame of the
    572    // lowest-addressed byte of the local.
    573    const int32_t offs;
    574 
    575    Local(MIRType type, int32_t offs) : type(type), offs(offs) {}
    576 
    577    bool isStackArgument() const { return offs < 0; }
    578  };
    579 
    580  // Profiling shows that the number of parameters and locals frequently
    581  // touches or exceeds 8.  So 16 seems like a reasonable starting point.
    582  using LocalVector = Vector<Local, 16, SystemAllocPolicy>;
    583 
    584  // Initialize `localInfo` based on the types of `locals` and `args`.
    585  [[nodiscard]] bool setupLocals(const ValTypeVector& locals,
    586                                 const ArgTypeVector& args, bool debugEnabled,
    587                                 LocalVector* localInfo) {
    588    if (!localInfo->reserve(locals.length())) {
    589      return false;
    590    }
    591 
    592    mozilla::DebugOnly<uint32_t> index = 0;
    593    BaseLocalIter i(locals, args, debugEnabled);
    594    for (; !i.done() && i.index() < args.lengthWithoutStackResults(); i++) {
    595      MOZ_ASSERT(i.isArg());
    596      MOZ_ASSERT(i.index() == index);
    597      localInfo->infallibleEmplaceBack(i.mirType(), i.frameOffset());
    598      index++;
    599    }
    600 
    601    varLow_ = i.frameSize();
    602    for (; !i.done(); i++) {
    603      MOZ_ASSERT(!i.isArg());
    604      MOZ_ASSERT(i.index() == index);
    605      localInfo->infallibleEmplaceBack(i.mirType(), i.frameOffset());
    606      index++;
    607    }
    608    varHigh_ = i.frameSize();
    609 
    610    // Reserve an additional stack slot for the instance pointer.
    611    const uint32_t pointerAlignedVarHigh = AlignBytes(varHigh_, sizeof(void*));
    612    const uint32_t localSize = pointerAlignedVarHigh + sizeof(void*);
    613    instancePointerOffset_ = localSize;
    614 
    615    setLocalSize(AlignBytes(localSize, WasmStackAlignment));
    616 
    617    if (args.hasSyntheticStackResultPointerArg()) {
    618      stackResultsPtrOffset_ = mozilla::Some(i.stackResultPointerOffset());
    619    }
    620 
    621    return true;
    622  }
    623 
    624  void zeroLocals(BaseRegAlloc* ra);
    625 
    626  Address addressOfLocal(const Local& local, uint32_t additionalOffset = 0) {
    627    if (local.isStackArgument()) {
    628      return Address(FramePointer,
    629                     stackArgumentOffsetFromFp(local) + additionalOffset);
    630    }
    631    return Address(sp_, localOffsetFromSp(local) + additionalOffset);
    632  }
    633 
    634  void loadLocalI32(const Local& src, RegI32 dest) {
    635    masm.load32(addressOfLocal(src), dest);
    636  }
    637 
    638 #ifndef JS_PUNBOX64
    639  void loadLocalI64Low(const Local& src, RegI32 dest) {
    640    masm.load32(addressOfLocal(src, INT64LOW_OFFSET), dest);
    641  }
    642 
    643  void loadLocalI64High(const Local& src, RegI32 dest) {
    644    masm.load32(addressOfLocal(src, INT64HIGH_OFFSET), dest);
    645  }
    646 #endif
    647 
    648  void loadLocalI64(const Local& src, RegI64 dest) {
    649    masm.load64(addressOfLocal(src), dest);
    650  }
    651 
    652  void loadLocalRef(const Local& src, RegRef dest) {
    653    masm.loadPtr(addressOfLocal(src), dest);
    654  }
    655 
    656  void loadLocalF64(const Local& src, RegF64 dest) {
    657    masm.loadDouble(addressOfLocal(src), dest);
    658  }
    659 
    660  void loadLocalF32(const Local& src, RegF32 dest) {
    661    masm.loadFloat32(addressOfLocal(src), dest);
    662  }
    663 
    664 #ifdef ENABLE_WASM_SIMD
    665  void loadLocalV128(const Local& src, RegV128 dest) {
    666    masm.loadUnalignedSimd128(addressOfLocal(src), dest);
    667  }
    668 #endif
    669 
    670  void storeLocalI32(RegI32 src, const Local& dest) {
    671    masm.store32(src, addressOfLocal(dest));
    672  }
    673 
    674  void storeLocalI64(RegI64 src, const Local& dest) {
    675    masm.store64(src, addressOfLocal(dest));
    676  }
    677 
    678  void storeLocalRef(RegRef src, const Local& dest) {
    679    masm.storePtr(src, addressOfLocal(dest));
    680  }
    681 
    682  void storeLocalF64(RegF64 src, const Local& dest) {
    683    masm.storeDouble(src, addressOfLocal(dest));
    684  }
    685 
    686  void storeLocalF32(RegF32 src, const Local& dest) {
    687    masm.storeFloat32(src, addressOfLocal(dest));
    688  }
    689 
    690 #ifdef ENABLE_WASM_SIMD
    691  void storeLocalV128(RegV128 src, const Local& dest) {
    692    masm.storeUnalignedSimd128(src, addressOfLocal(dest));
    693  }
    694 #endif
    695 
    696  // Offset off of sp_ for `local`.
    697  int32_t localOffsetFromSp(const Local& local) {
    698    MOZ_ASSERT(!local.isStackArgument());
    699    return localOffset(local.offs);
    700  }
    701 
    702  // Offset off of frame pointer for `stack argument`.
    703  int32_t stackArgumentOffsetFromFp(const Local& local) {
    704    MOZ_ASSERT(local.isStackArgument());
    705    return -local.offs;
    706  }
    707 
    708  // The incoming stack result area pointer is for stack results of the function
    709  // being compiled.
    710  void loadIncomingStackResultAreaPtr(RegPtr reg) {
    711    const int32_t offset = stackResultsPtrOffset_.value();
    712    Address src = offset < 0 ? Address(FramePointer, -offset)
    713                             : Address(sp_, stackOffset(offset));
    714    masm.loadPtr(src, reg);
    715  }
    716 
    717  void storeIncomingStackResultAreaPtr(RegPtr reg) {
    718    // If we get here, that means the pointer to the stack results area was
    719    // passed in as a register, and therefore it will be spilled below the
    720    // frame, so the offset is a positive height.
    721    MOZ_ASSERT(stackResultsPtrOffset_.value() > 0);
    722    masm.storePtr(reg,
    723                  Address(sp_, stackOffset(stackResultsPtrOffset_.value())));
    724  }
    725 
    726  void loadInstancePtr(Register dst) {
    727    // Sometimes loadInstancePtr is used in context when SP is not sync is FP,
    728    // e.g. just after tail calls returns.
    729    masm.loadPtr(Address(FramePointer, -instancePointerOffset_), dst);
    730  }
    731 
    732  void storeInstancePtr(Register instance) {
    733    masm.storePtr(instance, Address(sp_, stackOffset(instancePointerOffset_)));
    734  }
    735 
    736  int32_t getInstancePtrOffset() { return stackOffset(instancePointerOffset_); }
    737 
    738  // An outgoing stack result area pointer is for stack results of callees of
    739  // the function being compiled.
    740  void computeOutgoingStackResultAreaPtr(const StackResultsLoc& results,
    741                                         RegPtr dest) {
    742    MOZ_ASSERT(results.height() <= masm.framePushed());
    743    uint32_t offsetFromSP = masm.framePushed() - results.height();
    744    masm.moveStackPtrTo(dest);
    745    if (offsetFromSP) {
    746      masm.addPtr(Imm32(offsetFromSP), dest);
    747    }
    748  }
    749 
    750 private:
    751  // Offset off of sp_ for a local with offset `offset` from Frame.
    752  int32_t localOffset(int32_t offset) { return masm.framePushed() - offset; }
    753 
    754 public:
    755  ///////////////////////////////////////////////////////////////////////////
    756  //
    757  // Dynamic area
    758 
    759  static constexpr size_t StackSizeOfPtr = ABIResult::StackSizeOfPtr;
    760  static constexpr size_t StackSizeOfInt64 = ABIResult::StackSizeOfInt64;
    761  static constexpr size_t StackSizeOfFloat = ABIResult::StackSizeOfFloat;
    762  static constexpr size_t StackSizeOfDouble = ABIResult::StackSizeOfDouble;
    763 #ifdef ENABLE_WASM_SIMD
    764  static constexpr size_t StackSizeOfV128 = ABIResult::StackSizeOfV128;
    765 #endif
    766 
    767  // Pushes the register `r` to the stack. This pushes the full 64-bit width on
    768  // 64-bit systems, and 32-bits otherwise.
    769  uint32_t pushGPR(Register r) {
    770    mozilla::DebugOnly<uint32_t> stackBefore = currentStackHeight();
    771 #ifdef RABALDR_CHUNKY_STACK
    772    pushChunkyBytes(StackSizeOfPtr);
    773    masm.storePtr(r, Address(sp_, stackOffset(currentStackHeight())));
    774 #else
    775    masm.Push(r);
    776 #endif
    777    maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
    778    MOZ_ASSERT(stackBefore + StackSizeOfPtr == currentStackHeight());
    779    return currentStackHeight();
    780  }
    781 
    782  uint32_t pushFloat32(FloatRegister r) {
    783    mozilla::DebugOnly<uint32_t> stackBefore = currentStackHeight();
    784 #ifdef RABALDR_CHUNKY_STACK
    785    pushChunkyBytes(StackSizeOfFloat);
    786    masm.storeFloat32(r, Address(sp_, stackOffset(currentStackHeight())));
    787 #else
    788    masm.Push(r);
    789 #endif
    790    maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
    791    MOZ_ASSERT(stackBefore + StackSizeOfFloat == currentStackHeight());
    792    return currentStackHeight();
    793  }
    794 
    795 #ifdef ENABLE_WASM_SIMD
    796  uint32_t pushV128(RegV128 r) {
    797    mozilla::DebugOnly<uint32_t> stackBefore = currentStackHeight();
    798 #  ifdef RABALDR_CHUNKY_STACK
    799    pushChunkyBytes(StackSizeOfV128);
    800 #  else
    801    masm.adjustStack(-(int)StackSizeOfV128);
    802 #  endif
    803    masm.storeUnalignedSimd128(r,
    804                               Address(sp_, stackOffset(currentStackHeight())));
    805    maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
    806    MOZ_ASSERT(stackBefore + StackSizeOfV128 == currentStackHeight());
    807    return currentStackHeight();
    808  }
    809 #endif
    810 
    811  uint32_t pushDouble(FloatRegister r) {
    812    mozilla::DebugOnly<uint32_t> stackBefore = currentStackHeight();
    813 #ifdef RABALDR_CHUNKY_STACK
    814    pushChunkyBytes(StackSizeOfDouble);
    815    masm.storeDouble(r, Address(sp_, stackOffset(currentStackHeight())));
    816 #else
    817    masm.Push(r);
    818 #endif
    819    maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
    820    MOZ_ASSERT(stackBefore + StackSizeOfDouble == currentStackHeight());
    821    return currentStackHeight();
    822  }
    823 
    824  // Pops the stack into the register `r`. This pops the full 64-bit width on
    825  // 64-bit systems, and 32-bits otherwise.
    826  void popGPR(Register r) {
    827    mozilla::DebugOnly<uint32_t> stackBefore = currentStackHeight();
    828 #ifdef RABALDR_CHUNKY_STACK
    829    masm.loadPtr(Address(sp_, stackOffset(currentStackHeight())), r);
    830    popChunkyBytes(StackSizeOfPtr);
    831 #else
    832    masm.Pop(r);
    833 #endif
    834    MOZ_ASSERT(stackBefore - StackSizeOfPtr == currentStackHeight());
    835  }
    836 
    837  void popFloat32(FloatRegister r) {
    838    mozilla::DebugOnly<uint32_t> stackBefore = currentStackHeight();
    839 #ifdef RABALDR_CHUNKY_STACK
    840    masm.loadFloat32(Address(sp_, stackOffset(currentStackHeight())), r);
    841    popChunkyBytes(StackSizeOfFloat);
    842 #else
    843    masm.Pop(r);
    844 #endif
    845    MOZ_ASSERT(stackBefore - StackSizeOfFloat == currentStackHeight());
    846  }
    847 
    848  void popDouble(FloatRegister r) {
    849    mozilla::DebugOnly<uint32_t> stackBefore = currentStackHeight();
    850 #ifdef RABALDR_CHUNKY_STACK
    851    masm.loadDouble(Address(sp_, stackOffset(currentStackHeight())), r);
    852    popChunkyBytes(StackSizeOfDouble);
    853 #else
    854    masm.Pop(r);
    855 #endif
    856    MOZ_ASSERT(stackBefore - StackSizeOfDouble == currentStackHeight());
    857  }
    858 
    859 #ifdef ENABLE_WASM_SIMD
    860  void popV128(RegV128 r) {
    861    mozilla::DebugOnly<uint32_t> stackBefore = currentStackHeight();
    862    masm.loadUnalignedSimd128(Address(sp_, stackOffset(currentStackHeight())),
    863                              r);
    864 #  ifdef RABALDR_CHUNKY_STACK
    865    popChunkyBytes(StackSizeOfV128);
    866 #  else
    867    masm.adjustStack((int)StackSizeOfV128);
    868 #  endif
    869    MOZ_ASSERT(stackBefore - StackSizeOfV128 == currentStackHeight());
    870  }
    871 #endif
    872 
    873  void popBytes(size_t bytes) {
    874    if (bytes > 0) {
    875 #ifdef RABALDR_CHUNKY_STACK
    876      popChunkyBytes(bytes);
    877 #else
    878      masm.freeStack(bytes);
    879 #endif
    880    }
    881  }
    882 
    883  void loadStackI32(int32_t offset, RegI32 dest) {
    884    masm.load32(Address(sp_, stackOffset(offset)), dest);
    885  }
    886 
    887  void loadStackI64(int32_t offset, RegI64 dest) {
    888    masm.load64(Address(sp_, stackOffset(offset)), dest);
    889  }
    890 
    891 #ifndef JS_PUNBOX64
    892  void loadStackI64Low(int32_t offset, RegI32 dest) {
    893    masm.load32(Address(sp_, stackOffset(offset - INT64LOW_OFFSET)), dest);
    894  }
    895 
    896  void loadStackI64High(int32_t offset, RegI32 dest) {
    897    masm.load32(Address(sp_, stackOffset(offset - INT64HIGH_OFFSET)), dest);
    898  }
    899 #endif
    900 
    901  void loadStackRef(int32_t offset, RegRef dest) {
    902    masm.loadPtr(Address(sp_, stackOffset(offset)), dest);
    903  }
    904 
    905  void loadStackF64(int32_t offset, RegF64 dest) {
    906    masm.loadDouble(Address(sp_, stackOffset(offset)), dest);
    907  }
    908 
    909  void loadStackF32(int32_t offset, RegF32 dest) {
    910    masm.loadFloat32(Address(sp_, stackOffset(offset)), dest);
    911  }
    912 
    913 #ifdef ENABLE_WASM_SIMD
    914  void loadStackV128(int32_t offset, RegV128 dest) {
    915    masm.loadUnalignedSimd128(Address(sp_, stackOffset(offset)), dest);
    916  }
    917 #endif
    918 
    919  uint32_t prepareStackResultArea(StackHeight stackBase,
    920                                  uint32_t stackResultBytes) {
    921    uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
    922    if (currentStackHeight() < end) {
    923      uint32_t bytes = end - currentStackHeight();
    924 #ifdef RABALDR_CHUNKY_STACK
    925      pushChunkyBytes(bytes);
    926 #else
    927      masm.reserveStack(bytes);
    928 #endif
    929      maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
    930    }
    931    return end;
    932  }
    933 
    934  void finishStackResultArea(StackHeight stackBase, uint32_t stackResultBytes) {
    935    uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
    936    MOZ_ASSERT(currentStackHeight() >= end);
    937    popBytes(currentStackHeight() - end);
    938  }
    939 
    940  // |srcHeight| and |destHeight| are stack heights *including* |bytes|.
    941  void shuffleStackResultsTowardFP(uint32_t srcHeight, uint32_t destHeight,
    942                                   uint32_t bytes, Register temp) {
    943    MOZ_ASSERT(destHeight < srcHeight);
    944    MOZ_ASSERT(bytes % sizeof(uint32_t) == 0);
    945    // The shuffleStackResultsTowardFP is used when SP/framePushed is not
    946    // tracked by the compiler, e.g. after possible return call -- use
    947    // FramePointer instead of sp_.
    948    int32_t destOffset = int32_t(-destHeight + bytes);
    949    int32_t srcOffset = int32_t(-srcHeight + bytes);
    950    while (bytes >= sizeof(intptr_t)) {
    951      destOffset -= sizeof(intptr_t);
    952      srcOffset -= sizeof(intptr_t);
    953      bytes -= sizeof(intptr_t);
    954      masm.loadPtr(Address(FramePointer, srcOffset), temp);
    955      masm.storePtr(temp, Address(FramePointer, destOffset));
    956    }
    957    if (bytes) {
    958      MOZ_ASSERT(bytes == sizeof(uint32_t));
    959      destOffset -= sizeof(uint32_t);
    960      srcOffset -= sizeof(uint32_t);
    961      masm.load32(Address(FramePointer, srcOffset), temp);
    962      masm.store32(temp, Address(FramePointer, destOffset));
    963    }
    964  }
    965 
    966  // Unlike the overload that operates on raw heights, |srcHeight| and
    967  // |destHeight| are stack heights *not including* |bytes|.
    968  void shuffleStackResultsTowardFP(StackHeight srcHeight,
    969                                   StackHeight destHeight, uint32_t bytes,
    970                                   Register temp) {
    971    MOZ_ASSERT(srcHeight.isValid());
    972    MOZ_ASSERT(destHeight.isValid());
    973    uint32_t src = computeHeightWithStackResults(srcHeight, bytes);
    974    uint32_t dest = computeHeightWithStackResults(destHeight, bytes);
    975    MOZ_ASSERT(src <= currentStackHeight());
    976    MOZ_ASSERT(dest <= currentStackHeight());
    977    shuffleStackResultsTowardFP(src, dest, bytes, temp);
    978  }
    979 
    980  // |srcHeight| and |destHeight| are stack heights *including* |bytes|.
    981  void shuffleStackResultsTowardSP(uint32_t srcHeight, uint32_t destHeight,
    982                                   uint32_t bytes, Register temp) {
    983    MOZ_ASSERT(destHeight > srcHeight);
    984    MOZ_ASSERT(bytes % sizeof(uint32_t) == 0);
    985    uint32_t destOffset = stackOffset(destHeight);
    986    uint32_t srcOffset = stackOffset(srcHeight);
    987    while (bytes >= sizeof(intptr_t)) {
    988      masm.loadPtr(Address(sp_, srcOffset), temp);
    989      masm.storePtr(temp, Address(sp_, destOffset));
    990      destOffset += sizeof(intptr_t);
    991      srcOffset += sizeof(intptr_t);
    992      bytes -= sizeof(intptr_t);
    993    }
    994    if (bytes) {
    995      MOZ_ASSERT(bytes == sizeof(uint32_t));
    996      masm.load32(Address(sp_, srcOffset), temp);
    997      masm.store32(temp, Address(sp_, destOffset));
    998    }
    999  }
   1000 
   1001  // Copy results from the top of the current stack frame to an area of memory,
   1002  // and pop the stack accordingly.  `dest` is the address of the low byte of
   1003  // that memory.
   1004  void popStackResultsToMemory(Register dest, uint32_t bytes, Register temp) {
   1005    MOZ_ASSERT(bytes <= currentStackHeight());
   1006    MOZ_ASSERT(bytes % sizeof(uint32_t) == 0);
   1007    uint32_t bytesToPop = bytes;
   1008    uint32_t srcOffset = stackOffset(currentStackHeight());
   1009    uint32_t destOffset = 0;
   1010    while (bytes >= sizeof(intptr_t)) {
   1011      masm.loadPtr(Address(sp_, srcOffset), temp);
   1012      masm.storePtr(temp, Address(dest, destOffset));
   1013      destOffset += sizeof(intptr_t);
   1014      srcOffset += sizeof(intptr_t);
   1015      bytes -= sizeof(intptr_t);
   1016    }
   1017    if (bytes) {
   1018      MOZ_ASSERT(bytes == sizeof(uint32_t));
   1019      masm.load32(Address(sp_, srcOffset), temp);
   1020      masm.store32(temp, Address(dest, destOffset));
   1021    }
   1022    popBytes(bytesToPop);
   1023  }
   1024 
   1025  void allocArgArea(size_t argSize) {
   1026    if (argSize) {
   1027      BaseStackFrameAllocator::allocArgArea(argSize);
   1028      maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
   1029    }
   1030  }
   1031 
   1032 private:
   1033  void store32BitsToStack(int32_t imm, uint32_t destHeight, Register temp) {
   1034    masm.move32(Imm32(imm), temp);
   1035    masm.store32(temp, Address(sp_, stackOffset(destHeight)));
   1036  }
   1037 
   1038  void store64BitsToStack(int64_t imm, uint32_t destHeight, Register temp) {
   1039 #ifdef JS_PUNBOX64
   1040    masm.move64(Imm64(imm), Register64(temp));
   1041    masm.store64(Register64(temp), Address(sp_, stackOffset(destHeight)));
   1042 #else
   1043    union {
   1044      int64_t i64;
   1045      int32_t i32[2];
   1046    } bits = {.i64 = imm};
   1047    static_assert(sizeof(bits) == 8);
   1048    store32BitsToStack(bits.i32[0], destHeight, temp);
   1049    store32BitsToStack(bits.i32[1], destHeight - sizeof(int32_t), temp);
   1050 #endif
   1051  }
   1052 
   1053 public:
   1054  void storeImmediatePtrToStack(intptr_t imm, uint32_t destHeight,
   1055                                Register temp) {
   1056 #ifdef JS_PUNBOX64
   1057    static_assert(StackSizeOfPtr == 8);
   1058    store64BitsToStack(imm, destHeight, temp);
   1059 #else
   1060    static_assert(StackSizeOfPtr == 4);
   1061    store32BitsToStack(int32_t(imm), destHeight, temp);
   1062 #endif
   1063  }
   1064 
   1065  void storeImmediateI64ToStack(int64_t imm, uint32_t destHeight,
   1066                                Register temp) {
   1067    store64BitsToStack(imm, destHeight, temp);
   1068  }
   1069 
   1070  void storeImmediateF32ToStack(float imm, uint32_t destHeight, Register temp) {
   1071    union {
   1072      int32_t i32;
   1073      float f32;
   1074    } bits = {.f32 = imm};
   1075    static_assert(sizeof(bits) == 4);
   1076    // Do not store 4 bytes if StackSizeOfFloat == 8.  It's probably OK to do
   1077    // so, but it costs little to store something predictable.
   1078    if (StackSizeOfFloat == 4) {
   1079      store32BitsToStack(bits.i32, destHeight, temp);
   1080    } else {
   1081      store64BitsToStack(uint32_t(bits.i32), destHeight, temp);
   1082    }
   1083  }
   1084 
   1085  void storeImmediateF64ToStack(double imm, uint32_t destHeight,
   1086                                Register temp) {
   1087    union {
   1088      int64_t i64;
   1089      double f64;
   1090    } bits = {.f64 = imm};
   1091    static_assert(sizeof(bits) == 8);
   1092    store64BitsToStack(bits.i64, destHeight, temp);
   1093  }
   1094 
   1095 #ifdef ENABLE_WASM_SIMD
   1096  void storeImmediateV128ToStack(V128 imm, uint32_t destHeight, Register temp) {
   1097    union {
   1098      int32_t i32[4];
   1099      uint8_t bytes[16];
   1100    } bits{};
   1101    static_assert(sizeof(bits) == 16);
   1102    memcpy(bits.bytes, imm.bytes, 16);
   1103    for (unsigned i = 0; i < 4; i++) {
   1104      store32BitsToStack(bits.i32[i], destHeight - i * sizeof(int32_t), temp);
   1105    }
   1106  }
   1107 #endif
   1108 };
   1109 
   1110 //////////////////////////////////////////////////////////////////////////////
   1111 //
   1112 // MachineStackTracker, used for stack-slot pointerness tracking.
   1113 
   1114 // An expensive operation in stack-map creation is copying of the
   1115 // MachineStackTracker (MST) into the final StackMap.  This is done in
   1116 // StackMapGenerator::createStackMap.  Given that this is basically a
   1117 // bit-array copy, it is reasonable to ask whether the two classes could have
   1118 // a more similar representation, so that the copy could then be done with
   1119 // `memcpy`.
   1120 //
   1121 // Although in principle feasible, the follow complications exist, and so for
   1122 // the moment, this has not been done.
   1123 //
   1124 // * StackMap is optimised for compact size (storage) since there will be
   1125 //   many, so it uses a true bitmap.  MST is intended to be fast and simple,
   1126 //   and only one exists at once (per compilation thread).  Doing this would
   1127 //   require MST to use a true bitmap, and hence ..
   1128 //
   1129 // * .. the copying can't be a straight memcpy, since StackMap has entries for
   1130 //   words not covered by MST.  Hence the copy would need to shift bits in
   1131 //   each byte left or right (statistically speaking, in 7 cases out of 8) in
   1132 //   order to ensure no "holes" in the resulting bitmap.
   1133 //
   1134 // * Furthermore the copying would need to logically invert the direction of
   1135 //   the stacks.  For MST, index zero in the vector corresponds to the highest
   1136 //   address in the stack. For StackMap, bit index zero corresponds to the
   1137 //   lowest address in the stack.
   1138 //
   1139 // * Finally, StackMap is a variable-length structure whose size must be known
   1140 //   at creation time.  The size of an MST by contrast isn't known at creation
   1141 //   time -- it grows as the baseline compiler pushes stuff on its value
   1142 //   stack. That's why it has to have vector entry 0 being the highest address.
   1143 //
   1144 // * Although not directly relevant, StackMaps are also created by the via-Ion
   1145 //   compilation routes, by translation from the pre-existing "JS-era"
   1146 //   LSafePoints (CreateStackMapFromLSafepoint).  So if we want to mash
   1147 //   StackMap around to suit baseline better, we also need to ensure it
   1148 //   doesn't break Ion somehow.
   1149 
   1150 class MachineStackTracker {
   1151  // Simulates the machine's stack, with one bool per word.  The booleans are
   1152  // represented as `uint8_t`s so as to guarantee the element size is one
   1153  // byte.  Index zero in this vector corresponds to the highest address in
   1154  // the machine's stack.  The last entry corresponds to what SP currently
   1155  // points at.  This all assumes a grow-down stack.
   1156  //
   1157  // numPtrs_ contains the number of "true" values in vec_, and is therefore
   1158  // redundant.  But it serves as a constant-time way to detect the common
   1159  // case where vec_ holds no "true" values.
   1160  size_t numPtrs_;
   1161  Vector<uint8_t, 64, SystemAllocPolicy> vec_;
   1162 
   1163 public:
   1164  MachineStackTracker() : numPtrs_(0) {}
   1165 
   1166  ~MachineStackTracker() {
   1167 #ifdef DEBUG
   1168    size_t n = 0;
   1169    for (uint8_t b : vec_) {
   1170      n += (b ? 1 : 0);
   1171    }
   1172    MOZ_ASSERT(n == numPtrs_);
   1173 #endif
   1174  }
   1175 
   1176  // Clone this MachineStackTracker, writing the result at |dst|.
   1177  [[nodiscard]] bool cloneTo(MachineStackTracker* dst);
   1178 
   1179  // Notionally push |n| non-pointers on the stack.
   1180  [[nodiscard]] bool pushNonGCPointers(size_t n) {
   1181    return vec_.appendN(uint8_t(false), n);
   1182  }
   1183 
   1184  // Mark the stack slot |offsetFromSP| up from the bottom as holding a
   1185  // pointer.
   1186  void setGCPointer(size_t offsetFromSP) {
   1187    // offsetFromSP == 0 denotes the most recently pushed item, == 1 the
   1188    // second most recently pushed item, etc.
   1189    MOZ_ASSERT(offsetFromSP < vec_.length());
   1190 
   1191    size_t offsetFromTop = vec_.length() - 1 - offsetFromSP;
   1192    numPtrs_ = numPtrs_ + 1 - (vec_[offsetFromTop] ? 1 : 0);
   1193    vec_[offsetFromTop] = uint8_t(true);
   1194  }
   1195 
   1196  // Query the pointerness of the slot |offsetFromSP| up from the bottom.
   1197  bool isGCPointer(size_t offsetFromSP) const {
   1198    MOZ_ASSERT(offsetFromSP < vec_.length());
   1199 
   1200    size_t offsetFromTop = vec_.length() - 1 - offsetFromSP;
   1201    return bool(vec_[offsetFromTop]);
   1202  }
   1203 
   1204  // Return the number of words tracked by this MachineStackTracker.
   1205  size_t length() const { return vec_.length(); }
   1206 
   1207  // Return the number of pointer-typed words tracked by this
   1208  // MachineStackTracker.
   1209  size_t numPtrs() const {
   1210    MOZ_ASSERT(numPtrs_ <= length());
   1211    return numPtrs_;
   1212  }
   1213 
   1214  // Discard all contents, but (per mozilla::Vector::clear semantics) don't
   1215  // free or reallocate any dynamic storage associated with |vec_|.
   1216  void clear() {
   1217    vec_.clear();
   1218    numPtrs_ = 0;
   1219  }
   1220 
   1221  // An iterator that produces indices of reftyped slots, starting at the
   1222  // logical bottom of the (grow-down) stack.  Indices have the same meaning
   1223  // as the arguments to `isGCPointer`.  That is, if this iterator produces a
   1224  // value `i`, then it means that `isGCPointer(i) == true`; if the value `i`
   1225  // is never produced then `isGCPointer(i) == false`.  The values are
   1226  // produced in ascending order.
   1227  //
   1228  // Because most slots are non-reftyped, some effort has been put into
   1229  // skipping over large groups of non-reftyped slots quickly.
   1230  class Iter {
   1231    // Both `bufU8_` and `bufU32_` are made to point to `vec_`s array of
   1232    // `uint8_t`s, so we can scan (backwards) through it either in bytes or
   1233    // 32-bit words.  Recall that the last element in `vec_` pertains to the
   1234    // lowest-addressed word in the machine's grow-down stack, and we want to
   1235    // iterate logically "up" this stack, so we need to iterate backwards
   1236    // through `vec_`.
   1237    //
   1238    // This dual-pointer scheme assumes that the `vec_`s content array is at
   1239    // least 32-bit aligned.
   1240    const uint8_t* bufU8_;
   1241    const uint32_t* bufU32_;
   1242    // The number of elements in `bufU8_`.
   1243    const size_t nElems_;
   1244    // The index in `bufU8_` where the next search should start.
   1245    size_t next_;
   1246 
   1247   public:
   1248    explicit Iter(const MachineStackTracker& mst)
   1249        : bufU8_((uint8_t*)mst.vec_.begin()),
   1250          bufU32_((uint32_t*)mst.vec_.begin()),
   1251          nElems_(mst.vec_.length()),
   1252          next_(mst.vec_.length() - 1) {
   1253      MOZ_ASSERT(uintptr_t(bufU8_) == uintptr_t(bufU32_));
   1254      // Check minimum alignment constraint on the array.
   1255      MOZ_ASSERT(0 == (uintptr_t(bufU8_) & 3));
   1256    }
   1257 
   1258    ~Iter() { MOZ_ASSERT(uintptr_t(bufU8_) == uintptr_t(bufU32_)); }
   1259 
   1260    // It is important, for termination of the search loop in `next()`, that
   1261    // this has the value obtained by subtracting 1 from size_t(0).
   1262    static constexpr size_t FINISHED = ~size_t(0);
   1263    static_assert(FINISHED == size_t(0) - 1);
   1264 
   1265    // Returns the next index `i` for which `isGCPointer(i) == true`.
   1266    size_t get() {
   1267      while (next_ != FINISHED) {
   1268        if (bufU8_[next_]) {
   1269          next_--;
   1270          return nElems_ - 1 - (next_ + 1);
   1271        }
   1272        // Invariant: next_ != FINISHED (so it's still a valid index)
   1273        //       and: bufU8_[next_] == 0
   1274        //            (so we need to move backwards by at least 1)
   1275        //
   1276        // BEGIN optimization -- this could be removed without affecting
   1277        // correctness.
   1278        if ((next_ & 7) == 0) {
   1279          // We're at the "bottom" of the current dual-4-element word.  Check
   1280          // if we can jump backwards by 8.  This saves a conditional branch
   1281          // and a few cycles by ORing two adjacent 32-bit words together,
   1282          // whilst not requiring 64-bit alignment of `bufU32_`.
   1283          while (next_ >= 8 &&
   1284                 (bufU32_[(next_ - 4) >> 2] | bufU32_[(next_ - 8) >> 2]) == 0) {
   1285            next_ -= 8;
   1286          }
   1287        }
   1288        // END optimization
   1289        next_--;
   1290      }
   1291      return FINISHED;
   1292    }
   1293  };
   1294 };
   1295 
   1296 //////////////////////////////////////////////////////////////////////////////
   1297 //
   1298 // StackMapGenerator, which carries all state needed to create stackmaps.
   1299 
   1300 enum class HasDebugFrameWithLiveRefs { No, Maybe };
   1301 
   1302 struct StackMapGenerator {
   1303 private:
   1304  // --- These are constant for the life of the function's compilation ---
   1305 
   1306  // For generating stackmaps, we'll need to know the offsets of registers
   1307  // as saved by the trap exit stub.
   1308  const RegisterOffsets& trapExitLayout_;
   1309  const size_t trapExitLayoutNumWords_;
   1310 
   1311  // Completed stackmaps are added here
   1312  StackMaps* stackMaps_;
   1313 
   1314  // So as to be able to get current offset when creating stackmaps
   1315  const MacroAssembler& masm_;
   1316 
   1317 public:
   1318  // --- These are constant once we've completed beginFunction() ---
   1319 
   1320  // The number of bytes of arguments passed to this function in memory.
   1321  size_t numStackArgBytes;
   1322 
   1323  MachineStackTracker machineStackTracker;  // tracks machine stack pointerness
   1324 
   1325  // This holds masm.framePushed at entry to the function's body.  It is a
   1326  // Maybe because createStackMap needs to know whether or not we're still
   1327  // in the prologue.  It makes a Nothing-to-Some transition just once per
   1328  // function.
   1329  mozilla::Maybe<uint32_t> framePushedAtEntryToBody;
   1330 
   1331  // --- These can change at any point ---
   1332 
   1333  // This holds masm.framePushed at it would be be for a function call
   1334  // instruction, but excluding the stack area used to pass arguments in
   1335  // memory.  That is, for an upcoming function call, this will hold
   1336  //
   1337  //   masm.framePushed() at the call instruction -
   1338  //      StackArgAreaSizeAligned(argumentTypes)
   1339  //
   1340  // This value denotes the lowest-addressed stack word covered by the current
   1341  // function's stackmap.  Words below this point form the highest-addressed
   1342  // area of the callee's stackmap.  Note that all alignment padding above the
   1343  // arguments-in-memory themselves belongs to the callee's stackmap, as return
   1344  // calls will replace the function arguments with a new set of arguments which
   1345  // may have different alignment.
   1346  //
   1347  // When not inside a function call setup/teardown sequence, it is Nothing.
   1348  // It can make Nothing-to/from-Some transitions arbitrarily as we progress
   1349  // through the function body.
   1350  mozilla::Maybe<uint32_t> framePushedExcludingOutboundCallArgs;
   1351 
   1352  // The number of memory-resident, ref-typed entries on the containing
   1353  // BaseCompiler::stk_.
   1354  size_t memRefsOnStk;
   1355 
   1356  // This is a copy of machineStackTracker that is used only within individual
   1357  // calls to createStackMap. It is here only to avoid possible heap allocation
   1358  // costs resulting from making it local to createStackMap().
   1359  MachineStackTracker augmentedMst;
   1360 
   1361  StackMapGenerator(StackMaps* stackMaps, const RegisterOffsets& trapExitLayout,
   1362                    const size_t trapExitLayoutNumWords,
   1363                    const MacroAssembler& masm)
   1364      : trapExitLayout_(trapExitLayout),
   1365        trapExitLayoutNumWords_(trapExitLayoutNumWords),
   1366        stackMaps_(stackMaps),
   1367        masm_(masm),
   1368        numStackArgBytes(0),
   1369        memRefsOnStk(0) {}
   1370 
   1371  // At the beginning of a function, we may have live roots in registers (as
   1372  // arguments) at the point where we perform a stack overflow check.  This
   1373  // method generates the "extra" stackmap entries to describe that, in the
   1374  // case that the check fails and we wind up calling into the wasm exit
   1375  // stub, as generated by GenerateTrapExit().
   1376  //
   1377  // The resulting map must correspond precisely with the stack layout
   1378  // created for the integer registers as saved by (code generated by)
   1379  // GenerateTrapExit().  To do that we use trapExitLayout_ and
   1380  // trapExitLayoutNumWords_, which together comprise a description of the
   1381  // layout and are created by GenerateTrapExitRegisterOffsets().
   1382  [[nodiscard]] bool generateStackmapEntriesForTrapExit(
   1383      const ArgTypeVector& args, ExitStubMapVector* extras);
   1384 
   1385  // Creates a stackmap incorporating pointers from the current operand
   1386  // stack |stk|, incorporating possible extra pointers in |extra| at the
   1387  // lower addressed end, and possibly with the associated frame having a
   1388  // DebugFrame that must be traced, as indicated by |debugFrameWithLiveRefs|.
   1389  [[nodiscard]] bool createStackMap(
   1390      const char* who, const ExitStubMapVector& extras,
   1391      HasDebugFrameWithLiveRefs debugFrameWithLiveRefs, const StkVector& stk,
   1392      wasm::StackMap** result);
   1393 };
   1394 
   1395 }  // namespace wasm
   1396 }  // namespace js
   1397 
   1398 #endif  // wasm_wasm_baseline_frame_h