tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

WasmBCMemory.cpp (94839B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 *
      4 * Copyright 2016 Mozilla Foundation
      5 *
      6 * Licensed under the Apache License, Version 2.0 (the "License");
      7 * you may not use this file except in compliance with the License.
      8 * You may obtain a copy of the License at
      9 *
     10 *     http://www.apache.org/licenses/LICENSE-2.0
     11 *
     12 * Unless required by applicable law or agreed to in writing, software
     13 * distributed under the License is distributed on an "AS IS" BASIS,
     14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15 * See the License for the specific language governing permissions and
     16 * limitations under the License.
     17 */
     18 
     19 #include "wasm/WasmBCClass.h"
     20 #include "wasm/WasmBCDefs.h"
     21 #include "wasm/WasmBCRegDefs.h"
     22 #include "wasm/WasmConstants.h"
     23 #include "wasm/WasmMemory.h"
     24 
     25 #include "jit/MacroAssembler-inl.h"
     26 
     27 #include "wasm/WasmBCClass-inl.h"
     28 #include "wasm/WasmBCCodegen-inl.h"
     29 #include "wasm/WasmBCRegDefs-inl.h"
     30 #include "wasm/WasmBCRegMgmt-inl.h"
     31 #include "wasm/WasmBCStkMgmt-inl.h"
     32 
     33 namespace js {
     34 namespace wasm {
     35 
     36 using mozilla::Nothing;
     37 
     38 //////////////////////////////////////////////////////////////////////////////
     39 //
     40 // Heap access subroutines.
     41 
     42 // Bounds check elimination.
     43 //
     44 // We perform BCE on two kinds of address expressions: on constant heap pointers
     45 // that are known to be in the heap or will be handled by the out-of-bounds trap
     46 // handler; and on local variables that have been checked in dominating code
     47 // without being updated since.
     48 //
     49 // For an access through a constant heap pointer + an offset we can eliminate
     50 // the bounds check if the sum of the address and offset is below the sum of the
     51 // minimum memory length and the offset guard length.
     52 //
     53 // For an access through a local variable + an offset we can eliminate the
     54 // bounds check if the local variable has already been checked and has not been
     55 // updated since, and the offset is less than the guard limit.
     56 //
     57 // To track locals for which we can eliminate checks we use a bit vector
     58 // bceSafe_ that has a bit set for those locals whose bounds have been checked
     59 // and which have not subsequently been set.  Initially this vector is zero.
     60 //
     61 // In straight-line code a bit is set when we perform a bounds check on an
     62 // access via the local and is reset when the variable is updated.
     63 //
     64 // In control flow, the bit vector is manipulated as follows.  Each ControlItem
     65 // has a value bceSafeOnEntry, which is the value of bceSafe_ on entry to the
     66 // item, and a value bceSafeOnExit, which is initially ~0.  On a branch (br,
     67 // brIf, brTable), we always AND the branch target's bceSafeOnExit with the
     68 // value of bceSafe_ at the branch point.  On exiting an item by falling out of
     69 // it, provided we're not in dead code, we AND the current value of bceSafe_
     70 // into the item's bceSafeOnExit.  Additional processing depends on the item
     71 // type:
     72 //
     73 //  - After a block, set bceSafe_ to the block's bceSafeOnExit.
     74 //
     75 //  - On loop entry, after pushing the ControlItem, set bceSafe_ to zero; the
     76 //    back edges would otherwise require us to iterate to a fixedpoint.
     77 //
     78 //  - After a loop, the bceSafe_ is left unchanged, because only fallthrough
     79 //    control flow will reach that point and the bceSafe_ value represents the
     80 //    correct state of the fallthrough path.
     81 //
     82 //  - Set bceSafe_ to the ControlItem's bceSafeOnEntry at both the 'then' branch
     83 //    and the 'else' branch.
     84 //
     85 //  - After an if-then-else, set bceSafe_ to the if-then-else's bceSafeOnExit.
     86 //
     87 //  - After an if-then, set bceSafe_ to the if-then's bceSafeOnExit AND'ed with
     88 //    the if-then's bceSafeOnEntry.
     89 //
     90 // Finally, when the debugger allows locals to be mutated we must disable BCE
     91 // for references via a local, by returning immediately from bceCheckLocal if
     92 // compilerEnv_.debugEnabled() is true.
     93 
     94 void BaseCompiler::bceCheckLocal(MemoryAccessDesc* access, AccessCheck* check,
     95                                 uint32_t local) {
     96  // We only eliminate bounds checks for memory 0
     97  if (access->memoryIndex() != 0) {
     98    return;
     99  }
    100 
    101  if (local >= sizeof(BCESet) * 8) {
    102    return;
    103  }
    104 
    105 #ifdef ENABLE_WASM_CUSTOM_PAGE_SIZES
    106  if (codeMeta_.memories[0].pageSize() != PageSize::Standard) {
    107    return;
    108  }
    109 #endif
    110 
    111  uint64_t offsetGuardLimit = GetMaxOffsetGuardLimit(
    112      codeMeta_.hugeMemoryEnabled(0), codeMeta_.memories[0].pageSize());
    113 
    114  if ((bceSafe_ & (BCESet(1) << local)) &&
    115      access->offset64() < offsetGuardLimit) {
    116    check->omitBoundsCheck = true;
    117  }
    118 
    119  // The local becomes safe even if the offset is beyond the guard limit.
    120  bceSafe_ |= (BCESet(1) << local);
    121 }
    122 
    123 void BaseCompiler::bceLocalIsUpdated(uint32_t local) {
    124  if (local >= sizeof(BCESet) * 8) {
    125    return;
    126  }
    127 
    128  bceSafe_ &= ~(BCESet(1) << local);
    129 }
    130 
    131 // Alignment check elimination.
    132 //
    133 // Alignment checks for atomic operations can be omitted if the pointer is a
    134 // constant and the pointer + offset is aligned.  Alignment checking that can't
    135 // be omitted can still be simplified by checking only the pointer if the offset
    136 // is aligned.
    137 //
    138 // (In addition, alignment checking of the pointer can be omitted if the pointer
    139 // has been checked in dominating code, but we don't do that yet.)
    140 
    141 template <>
    142 RegI32 BaseCompiler::popConstMemoryAccess<RegI32>(MemoryAccessDesc* access,
    143                                                  AccessCheck* check) {
    144  MOZ_ASSERT(isMem32(access->memoryIndex()));
    145 
    146  int32_t addrTemp;
    147  MOZ_ALWAYS_TRUE(popConst(&addrTemp));
    148  uint32_t addr = addrTemp;
    149 
    150  uint64_t offsetGuardLimit = GetMaxOffsetGuardLimit(
    151      codeMeta_.hugeMemoryEnabled(access->memoryIndex()),
    152      codeMeta_.memories[access->memoryIndex()].pageSize());
    153 
    154  // Validation ensures that the offset is in 32-bit range, and the calculation
    155  // of the limit cannot overflow due to our choice of HugeOffsetGuardLimit.
    156 #ifdef WASM_SUPPORTS_HUGE_MEMORY
    157  static_assert(MaxMemory32StandardPagesValidation * StandardPageSizeBytes <=
    158                UINT64_MAX - HugeOffsetGuardLimit);
    159 #endif
    160  uint64_t ea = uint64_t(addr) + uint64_t(access->offset32());
    161  uint64_t limit = codeMeta_.memories[access->memoryIndex()].initialLength() +
    162                   offsetGuardLimit;
    163 
    164  check->omitBoundsCheck = ea < limit;
    165  check->omitAlignmentCheck = (ea & (access->byteSize() - 1)) == 0;
    166 
    167  // Fold the offset into the pointer if we can, as this is always
    168  // beneficial.
    169  if (ea <= UINT32_MAX) {
    170    addr = uint32_t(ea);
    171    access->clearOffset();
    172  }
    173 
    174  RegI32 r = needI32();
    175  moveImm32(int32_t(addr), r);
    176  return r;
    177 }
    178 
    179 template <>
    180 RegI64 BaseCompiler::popConstMemoryAccess<RegI64>(MemoryAccessDesc* access,
    181                                                  AccessCheck* check) {
    182  MOZ_ASSERT(isMem64(access->memoryIndex()));
    183 
    184  int64_t addrTemp;
    185  MOZ_ALWAYS_TRUE(popConst(&addrTemp));
    186  uint64_t addr = addrTemp;
    187 
    188  uint64_t offsetGuardLimit = GetMaxOffsetGuardLimit(
    189      codeMeta_.hugeMemoryEnabled(access->memoryIndex()),
    190      codeMeta_.memories[access->memoryIndex()].pageSize());
    191 
    192  mozilla::CheckedUint64 ea(addr);
    193  ea += access->offset64();
    194  mozilla::CheckedUint64 limit(
    195      codeMeta_.memories[access->memoryIndex()].initialLength());
    196  limit += offsetGuardLimit;
    197 
    198  if (ea.isValid() && limit.isValid()) {
    199    check->omitBoundsCheck = ea.value() < limit.value();
    200    check->omitAlignmentCheck = (ea.value() & (access->byteSize() - 1)) == 0;
    201 
    202    // Fold the offset into the pointer if we can, as this is always
    203    // beneficial.
    204    addr = ea.value();
    205    access->clearOffset();
    206  }
    207 
    208  RegI64 r = needI64();
    209  moveImm64(int64_t(addr), r);
    210  return r;
    211 }
    212 
    213 template <typename RegType>
    214 RegType BaseCompiler::popMemoryAccess(MemoryAccessDesc* access,
    215                                      AccessCheck* check) {
    216  check->onlyPointerAlignment =
    217      (access->offset64() & (access->byteSize() - 1)) == 0;
    218 
    219  // If there's a constant it will have the correct type for RegType.
    220  if (hasConst()) {
    221    return popConstMemoryAccess<RegType>(access, check);
    222  }
    223 
    224  // If there's a local it will have the correct type for RegType.
    225  uint32_t local;
    226  if (peekLocal(&local)) {
    227    bceCheckLocal(access, check, local);
    228  }
    229 
    230  return pop<RegType>();
    231 }
    232 
    233 #ifdef JS_64BIT
    234 static inline RegI64 RegPtrToRegIntptr(RegPtr r) {
    235  return RegI64(Register64(Register(r)));
    236 }
    237 
    238 #  ifndef WASM_HAS_HEAPREG
    239 static inline RegPtr RegIntptrToRegPtr(RegI64 r) {
    240  return RegPtr(Register64(r).reg);
    241 }
    242 #  endif
    243 #else
    244 static inline RegI32 RegPtrToRegIntptr(RegPtr r) { return RegI32(Register(r)); }
    245 
    246 #  ifndef WASM_HAS_HEAPREG
    247 static inline RegPtr RegIntptrToRegPtr(RegI32 r) { return RegPtr(Register(r)); }
    248 #  endif
    249 #endif
    250 
    251 void BaseCompiler::pushHeapBase(uint32_t memoryIndex) {
    252  RegPtr heapBase = need<RegPtr>();
    253 
    254 #ifdef WASM_HAS_HEAPREG
    255  if (memoryIndex == 0) {
    256    move(RegPtr(HeapReg), heapBase);
    257    push(RegPtrToRegIntptr(heapBase));
    258    return;
    259  }
    260 #endif
    261 
    262 #ifdef RABALDR_PIN_INSTANCE
    263  movePtr(RegPtr(InstanceReg), heapBase);
    264 #else
    265  fr.loadInstancePtr(heapBase);
    266 #endif
    267 
    268  uint32_t offset = instanceOffsetOfMemoryBase(memoryIndex);
    269  masm.loadPtr(Address(heapBase, offset), heapBase);
    270  push(RegPtrToRegIntptr(heapBase));
    271 }
    272 
    273 void BaseCompiler::branchAddNoOverflow(uint64_t offset, RegI32 ptr, Label* ok) {
    274  // The invariant holds because ptr is RegI32 - this is m32.
    275  MOZ_ASSERT(offset <= UINT32_MAX);
    276  masm.branchAdd32(Assembler::CarryClear, Imm32(uint32_t(offset)), ptr, ok);
    277 }
    278 
    279 void BaseCompiler::branchAddNoOverflow(uint64_t offset, RegI64 ptr, Label* ok) {
    280 #if defined(JS_64BIT)
    281  masm.branchAddPtr(Assembler::CarryClear, ImmWord(offset), Register64(ptr).reg,
    282                    ok);
    283 #else
    284  masm.branchAdd64(Assembler::CarryClear, Imm64(offset), ptr, ok);
    285 #endif
    286 }
    287 
    288 void BaseCompiler::branchTestLowZero(RegI32 ptr, Imm32 mask, Label* ok) {
    289  masm.branchTest32(Assembler::Zero, ptr, mask, ok);
    290 }
    291 
    292 void BaseCompiler::branchTestLowZero(RegI64 ptr, Imm32 mask, Label* ok) {
    293 #ifdef JS_64BIT
    294  masm.branchTestPtr(Assembler::Zero, Register64(ptr).reg, mask, ok);
    295 #else
    296  masm.branchTestPtr(Assembler::Zero, ptr.low, mask, ok);
    297 #endif
    298 }
    299 
    300 void BaseCompiler::boundsCheck4GBOrLargerAccess(uint32_t memoryIndex,
    301                                                unsigned byteSize,
    302                                                RegPtr instance, RegI32 ptr,
    303                                                Label* ok) {
    304 #ifdef JS_64BIT
    305  // Extend the value to 64 bits, check the 64-bit value against the 64-bit
    306  // bound, then chop back to 32 bits.  On most platform the extending and
    307  // chopping are no-ops.  It's important that the value we end up with has
    308  // flowed through the Spectre mask
    309 
    310  // Note, ptr and ptr64 are the same register.
    311  RegI64 ptr64 = fromI32(ptr);
    312 
    313  // In principle there may be non-zero bits in the upper bits of the
    314  // register; clear them.
    315 #  ifdef RABALDR_ZERO_EXTENDS
    316  masm.debugAssertCanonicalInt32(ptr);
    317 #  else
    318  masm.move32To64ZeroExtend(ptr, ptr64);
    319 #  endif
    320 
    321  boundsCheck4GBOrLargerAccess(memoryIndex, byteSize, instance, ptr64, ok);
    322 
    323  // Restore the value to the canonical form for a 32-bit value in a
    324  // 64-bit register and/or the appropriate form for further use in the
    325  // indexing instruction.
    326 #  ifdef RABALDR_ZERO_EXTENDS
    327  // The canonical value is zero-extended; we already have that.
    328 #  else
    329  masm.move64To32(ptr64, ptr);
    330 #  endif
    331 #else
    332  // No support needed, we have max 2GB heap on 32-bit
    333  MOZ_CRASH("No 32-bit support");
    334 #endif
    335 }
    336 
    337 void BaseCompiler::boundsCheckBelow4GBAccess(uint32_t memoryIndex,
    338                                             unsigned byteSize, RegPtr instance,
    339                                             RegI32 ptr, Label* ok) {
    340  // If the memory's max size is known to be smaller than 64K pages exactly,
    341  // we can use a 32-bit check and avoid extension and wrapping.
    342  masm.wasmBoundsCheck32(Assembler::Below, ptr,
    343                         Address(instance, instanceOffsetOfBoundsCheckLimit(
    344                                               memoryIndex, byteSize)),
    345                         ok);
    346 }
    347 
    348 void BaseCompiler::boundsCheck4GBOrLargerAccess(uint32_t memoryIndex,
    349                                                unsigned byteSize,
    350                                                RegPtr instance, RegI64 ptr,
    351                                                Label* ok) {
    352  // Any Spectre mitigation will appear to update the ptr64 register.
    353  masm.wasmBoundsCheck64(Assembler::Below, ptr,
    354                         Address(instance, instanceOffsetOfBoundsCheckLimit(
    355                                               memoryIndex, byteSize)),
    356                         ok);
    357 }
    358 
    359 void BaseCompiler::boundsCheckBelow4GBAccess(uint32_t memoryIndex,
    360                                             unsigned byteSize, RegPtr instance,
    361                                             RegI64 ptr, Label* ok) {
    362  // The bounds check limit is valid to 64 bits, so there's no sense in doing
    363  // anything complicated here.  There may be optimization paths here in the
    364  // future and they may differ on 32-bit and 64-bit.
    365  boundsCheck4GBOrLargerAccess(memoryIndex, byteSize, instance, ptr, ok);
    366 }
    367 
    368 // Make sure the ptr could be used as an index register.
    369 static inline void ToValidIndex(MacroAssembler& masm, RegI32 ptr) {
    370 #if defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || \
    371    defined(JS_CODEGEN_RISCV64)
    372  // When ptr is used as an index, it will be added to a 64-bit register.
    373  // So we should explicitly promote ptr to 64-bit. Since now ptr holds a
    374  // unsigned 32-bit value, we zero-extend it to 64-bit here.
    375  masm.move32To64ZeroExtend(ptr, Register64(ptr));
    376 #endif
    377 }
    378 
    379 static inline void ToValidIndex(MacroAssembler& masm, RegI64 ptr) {}
    380 
    381 // RegAddressType is RegI32 for Memory32 and RegI64 for Memory64.
    382 template <typename RegAddressType>
    383 void BaseCompiler::prepareMemoryAccess(MemoryAccessDesc* access,
    384                                       AccessCheck* check, RegPtr instance,
    385                                       RegAddressType ptr) {
    386 #ifndef ENABLE_WASM_CUSTOM_PAGE_SIZES
    387  MOZ_ASSERT(codeMeta_.memories[access->memoryIndex()].pageSize() ==
    388             PageSize::Standard);
    389 #endif
    390 
    391  uint64_t offsetGuardLimit = GetMaxOffsetGuardLimit(
    392      codeMeta_.hugeMemoryEnabled(access->memoryIndex()),
    393      codeMeta_.memories[access->memoryIndex()].pageSize());
    394 
    395  // Fold offset if necessary for further computations.
    396  if (access->offset64() >= offsetGuardLimit ||
    397      access->offset64() > UINT32_MAX ||
    398      (access->isAtomic() && !check->omitAlignmentCheck &&
    399       !check->onlyPointerAlignment)) {
    400    Label ok;
    401    branchAddNoOverflow(access->offset64(), ptr, &ok);
    402    trap(Trap::OutOfBounds);
    403    masm.bind(&ok);
    404    access->clearOffset();
    405    check->onlyPointerAlignment = true;
    406  }
    407 
    408  // Alignment check if required.
    409 
    410  if (access->isAtomic() && !check->omitAlignmentCheck) {
    411    MOZ_ASSERT(check->onlyPointerAlignment);
    412    // We only care about the low pointer bits here.
    413    Label ok;
    414    branchTestLowZero(ptr, Imm32(access->byteSize() - 1), &ok);
    415    trap(Trap::UnalignedAccess);
    416    masm.bind(&ok);
    417  }
    418 
    419  // Ensure no instance if we don't need it.
    420 
    421  if (codeMeta_.hugeMemoryEnabled(access->memoryIndex()) &&
    422      access->memoryIndex() == 0) {
    423    // We have HeapReg and no bounds checking and need load neither
    424    // memoryBase nor boundsCheckLimit from instance.
    425    MOZ_ASSERT_IF(check->omitBoundsCheck, instance.isInvalid());
    426  }
    427 #ifdef WASM_HAS_HEAPREG
    428  // We have HeapReg and don't need to load the memoryBase from instance.
    429  MOZ_ASSERT_IF(check->omitBoundsCheck && access->memoryIndex() == 0,
    430                instance.isInvalid());
    431 #endif
    432 
    433  // Bounds check if required.
    434 
    435 #ifdef ENABLE_WASM_CUSTOM_PAGE_SIZES
    436  MOZ_ASSERT_IF(codeMeta_.memories[access->memoryIndex()].pageSize() !=
    437                    PageSize::Standard,
    438                !codeMeta_.hugeMemoryEnabled(access->memoryIndex()));
    439 #endif
    440 
    441  if (!codeMeta_.hugeMemoryEnabled(access->memoryIndex()) &&
    442      !check->omitBoundsCheck) {
    443    Label ok;
    444 #ifdef JS_64BIT
    445    // The checking depends on how many bits are in the pointer and how many
    446    // bits are in the bound.
    447    if (!codeMeta_.memories[access->memoryIndex()]
    448             .boundsCheckLimitIsAlways32Bits() &&
    449        MaxMemoryBytes(codeMeta_.memories[access->memoryIndex()].addressType(),
    450                       codeMeta_.memories[access->memoryIndex()].pageSize()) >=
    451            0x100000000) {
    452      boundsCheck4GBOrLargerAccess(access->memoryIndex(), access->byteSize(),
    453                                   instance, ptr, &ok);
    454    } else {
    455      boundsCheckBelow4GBAccess(access->memoryIndex(), access->byteSize(),
    456                                instance, ptr, &ok);
    457    }
    458 #else
    459    boundsCheckBelow4GBAccess(access->memoryIndex(), access->byteSize(),
    460                              instance, ptr, &ok);
    461 #endif
    462    trap(Trap::OutOfBounds);
    463    masm.bind(&ok);
    464  }
    465 
    466  ToValidIndex(masm, ptr);
    467 }
    468 
    469 template <typename RegAddressType>
    470 void BaseCompiler::computeEffectiveAddress(MemoryAccessDesc* access) {
    471  if (access->offset64()) {
    472    Label ok;
    473    RegAddressType ptr = pop<RegAddressType>();
    474    branchAddNoOverflow(access->offset64(), ptr, &ok);
    475    trap(Trap::OutOfBounds);
    476    masm.bind(&ok);
    477    access->clearOffset();
    478    push(ptr);
    479  }
    480 }
    481 
    482 RegPtr BaseCompiler::maybeLoadMemoryBaseForAccess(
    483    RegPtr instance, const MemoryAccessDesc* access) {
    484 #ifdef JS_CODEGEN_X86
    485  // x86 adds the memory base to the wasm pointer directly using an addressing
    486  // mode and doesn't require the memory base to be loaded to a register.
    487  return RegPtr();
    488 #endif
    489 
    490 #ifdef WASM_HAS_HEAPREG
    491  if (access->memoryIndex() == 0) {
    492    return RegPtr(HeapReg);
    493  }
    494 #endif
    495  RegPtr memoryBase = needPtr();
    496  uint32_t offset = instanceOffsetOfMemoryBase(access->memoryIndex());
    497  masm.loadPtr(Address(instance, offset), memoryBase);
    498  return memoryBase;
    499 }
    500 
    501 bool BaseCompiler::needInstanceForAccess(const MemoryAccessDesc* access,
    502                                         const AccessCheck& check) {
    503 #ifndef WASM_HAS_HEAPREG
    504  // Platform requires instance for memory base.
    505  return true;
    506 #else
    507  if (access->memoryIndex() != 0) {
    508    // Need instance to load the memory base
    509    return true;
    510  }
    511  return !codeMeta_.hugeMemoryEnabled(access->memoryIndex()) &&
    512         !check.omitBoundsCheck;
    513 #endif
    514 }
    515 
    516 RegPtr BaseCompiler::maybeLoadInstanceForAccess(const MemoryAccessDesc* access,
    517                                                const AccessCheck& check) {
    518  if (needInstanceForAccess(access, check)) {
    519 #ifdef RABALDR_PIN_INSTANCE
    520    // NOTE, returning InstanceReg here depends for correctness on *ALL*
    521    // clients not attempting to free this register and not push it on the value
    522    // stack.
    523    //
    524    // We have assertions in place to guard against that, so the risk of the
    525    // leaky abstraction is acceptable.  performRegisterLeakCheck() will ensure
    526    // that after every bytecode, the union of available registers from the
    527    // regalloc and used registers from the stack equals the set of allocatable
    528    // registers at startup.  Thus if the instance is freed incorrectly it will
    529    // end up in that union via the regalloc, and if it is pushed incorrectly it
    530    // will end up in the union via the stack.
    531    return RegPtr(InstanceReg);
    532 #else
    533    RegPtr instance = need<RegPtr>();
    534    fr.loadInstancePtr(instance);
    535    return instance;
    536 #endif
    537  }
    538  return RegPtr::Invalid();
    539 }
    540 
    541 RegPtr BaseCompiler::maybeLoadInstanceForAccess(const MemoryAccessDesc* access,
    542                                                const AccessCheck& check,
    543                                                RegPtr specific) {
    544  if (needInstanceForAccess(access, check)) {
    545 #ifdef RABALDR_PIN_INSTANCE
    546    movePtr(RegPtr(InstanceReg), specific);
    547 #else
    548    fr.loadInstancePtr(specific);
    549 #endif
    550    return specific;
    551  }
    552  return RegPtr::Invalid();
    553 }
    554 
    555 //////////////////////////////////////////////////////////////////////////////
    556 //
    557 // Load and store.
    558 
    559 void BaseCompiler::executeLoad(MemoryAccessDesc* access, AccessCheck* check,
    560                               RegPtr instance, RegPtr memoryBase, RegI32 ptr,
    561                               AnyReg dest, RegI32 temp) {
    562  // Emit the load. At this point, 64-bit offsets will have been folded away by
    563  // prepareMemoryAccess.
    564 #if defined(JS_CODEGEN_X64)
    565  MOZ_ASSERT(temp.isInvalid());
    566  Operand srcAddr(memoryBase, ptr, TimesOne, access->offset32());
    567 
    568  if (dest.tag == AnyReg::I64) {
    569    masm.wasmLoadI64(*access, srcAddr, dest.i64());
    570  } else {
    571    masm.wasmLoad(*access, srcAddr, dest.any());
    572  }
    573 #elif defined(JS_CODEGEN_X86)
    574  MOZ_ASSERT(memoryBase.isInvalid() && temp.isInvalid());
    575  masm.addPtr(
    576      Address(instance, instanceOffsetOfMemoryBase(access->memoryIndex())),
    577      ptr);
    578  Operand srcAddr(ptr, access->offset32());
    579 
    580  if (dest.tag == AnyReg::I64) {
    581    MOZ_ASSERT(dest.i64() == specific_.abiReturnRegI64);
    582    masm.wasmLoadI64(*access, srcAddr, dest.i64());
    583  } else {
    584    // For 8 bit loads, this will generate movsbl or movzbl, so
    585    // there's no constraint on what the output register may be.
    586    masm.wasmLoad(*access, srcAddr, dest.any());
    587  }
    588 #elif defined(JS_CODEGEN_MIPS64)
    589  if (IsUnaligned(*access)) {
    590    switch (dest.tag) {
    591      case AnyReg::I64:
    592        masm.wasmUnalignedLoadI64(*access, memoryBase, ptr, ptr, dest.i64(),
    593                                  temp);
    594        break;
    595      case AnyReg::F32:
    596        masm.wasmUnalignedLoadFP(*access, memoryBase, ptr, ptr, dest.f32(),
    597                                 temp);
    598        break;
    599      case AnyReg::F64:
    600        masm.wasmUnalignedLoadFP(*access, memoryBase, ptr, ptr, dest.f64(),
    601                                 temp);
    602        break;
    603      case AnyReg::I32:
    604        masm.wasmUnalignedLoad(*access, memoryBase, ptr, ptr, dest.i32(), temp);
    605        break;
    606      default:
    607        MOZ_CRASH("Unexpected type");
    608    }
    609  } else {
    610    if (dest.tag == AnyReg::I64) {
    611      masm.wasmLoadI64(*access, memoryBase, ptr, ptr, dest.i64());
    612    } else {
    613      masm.wasmLoad(*access, memoryBase, ptr, ptr, dest.any());
    614    }
    615  }
    616 #elif defined(JS_CODEGEN_ARM)
    617  MOZ_ASSERT(temp.isInvalid());
    618  if (dest.tag == AnyReg::I64) {
    619    masm.wasmLoadI64(*access, memoryBase, ptr, ptr, dest.i64());
    620  } else {
    621    masm.wasmLoad(*access, memoryBase, ptr, ptr, dest.any());
    622  }
    623 #elif defined(JS_CODEGEN_ARM64)
    624  MOZ_ASSERT(temp.isInvalid());
    625  if (dest.tag == AnyReg::I64) {
    626    masm.wasmLoadI64(*access, memoryBase, ptr, dest.i64());
    627  } else {
    628    masm.wasmLoad(*access, memoryBase, ptr, dest.any());
    629  }
    630 #elif defined(JS_CODEGEN_LOONG64)
    631  MOZ_ASSERT(temp.isInvalid());
    632  if (dest.tag == AnyReg::I64) {
    633    masm.wasmLoadI64(*access, memoryBase, ptr, ptr, dest.i64());
    634  } else {
    635    masm.wasmLoad(*access, memoryBase, ptr, ptr, dest.any());
    636  }
    637 #elif defined(JS_CODEGEN_RISCV64)
    638  MOZ_ASSERT(temp.isInvalid());
    639  if (dest.tag == AnyReg::I64) {
    640    masm.wasmLoadI64(*access, memoryBase, ptr, ptr, dest.i64());
    641  } else {
    642    masm.wasmLoad(*access, memoryBase, ptr, ptr, dest.any());
    643  }
    644 #else
    645  MOZ_CRASH("BaseCompiler platform hook: load");
    646 #endif
    647 }
    648 
    649 // ptr and dest may be the same iff dest is I32.
    650 // This may destroy ptr even if ptr and dest are not the same.
    651 void BaseCompiler::load(MemoryAccessDesc* access, AccessCheck* check,
    652                        RegPtr instance, RegPtr memoryBase, RegI32 ptr,
    653                        AnyReg dest, RegI32 temp) {
    654  prepareMemoryAccess(access, check, instance, ptr);
    655  executeLoad(access, check, instance, memoryBase, ptr, dest, temp);
    656 }
    657 
    658 void BaseCompiler::load(MemoryAccessDesc* access, AccessCheck* check,
    659                        RegPtr instance, RegPtr memoryBase, RegI64 ptr,
    660                        AnyReg dest, RegI64 temp) {
    661  prepareMemoryAccess(access, check, instance, ptr);
    662 
    663 #if !defined(JS_64BIT)
    664  // On 32-bit systems we have a maximum 2GB heap and bounds checking has
    665  // been applied to ensure that the 64-bit pointer is valid.
    666  return executeLoad(access, check, instance, memoryBase, RegI32(ptr.low), dest,
    667                     maybeFromI64(temp));
    668 #elif defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM64)
    669  // On x64 and arm64 the 32-bit code simply assumes that the high bits of the
    670  // 64-bit pointer register are zero and performs a 64-bit add.  Thus the code
    671  // generated is the same for the 64-bit and the 32-bit case.
    672  return executeLoad(access, check, instance, memoryBase, RegI32(ptr.reg), dest,
    673                     maybeFromI64(temp));
    674 #elif defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64)
    675  // On mips64 and loongarch64, the 'prepareMemoryAccess' function will make
    676  // sure that ptr holds a valid 64-bit index value. Thus the code generated in
    677  // 'executeLoad' is the same for the 64-bit and the 32-bit case.
    678  return executeLoad(access, check, instance, memoryBase, RegI32(ptr.reg), dest,
    679                     maybeFromI64(temp));
    680 #elif defined(JS_CODEGEN_RISCV64)
    681  // RISCV the 'prepareMemoryAccess' function will make
    682  // sure that ptr holds a valid 64-bit index value. Thus the code generated in
    683  // 'executeLoad' is the same for the 64-bit and the 32-bit case.
    684  return executeLoad(access, check, instance, memoryBase, RegI32(ptr.reg), dest,
    685                     maybeFromI64(temp));
    686 #else
    687  MOZ_CRASH("Missing platform hook");
    688 #endif
    689 }
    690 
    691 void BaseCompiler::executeStore(MemoryAccessDesc* access, AccessCheck* check,
    692                                RegPtr instance, RegPtr memoryBase, RegI32 ptr,
    693                                AnyReg src, RegI32 temp) {
    694  // Emit the store. At this point, 64-bit offsets will have been folded away by
    695  // prepareMemoryAccess.
    696 #if defined(JS_CODEGEN_X64)
    697  MOZ_ASSERT(temp.isInvalid());
    698  Operand dstAddr(memoryBase, ptr, TimesOne, access->offset32());
    699 
    700  masm.wasmStore(*access, src.any(), dstAddr);
    701 #elif defined(JS_CODEGEN_X86)
    702  MOZ_ASSERT(memoryBase.isInvalid() && temp.isInvalid());
    703  masm.addPtr(
    704      Address(instance, instanceOffsetOfMemoryBase(access->memoryIndex())),
    705      ptr);
    706  Operand dstAddr(ptr, access->offset32());
    707 
    708  if (access->type() == Scalar::Int64) {
    709    masm.wasmStoreI64(*access, src.i64(), dstAddr);
    710  } else {
    711    AnyRegister value;
    712    ScratchI8 scratch(*this);
    713    if (src.tag == AnyReg::I64) {
    714      if (access->byteSize() == 1 && !ra.isSingleByteI32(src.i64().low)) {
    715        masm.mov(src.i64().low, scratch);
    716        value = AnyRegister(scratch);
    717      } else {
    718        value = AnyRegister(src.i64().low);
    719      }
    720    } else if (access->byteSize() == 1 && !ra.isSingleByteI32(src.i32())) {
    721      masm.mov(src.i32(), scratch);
    722      value = AnyRegister(scratch);
    723    } else {
    724      value = src.any();
    725    }
    726 
    727    masm.wasmStore(*access, value, dstAddr);
    728  }
    729 #elif defined(JS_CODEGEN_ARM)
    730  MOZ_ASSERT(temp.isInvalid());
    731  if (access->type() == Scalar::Int64) {
    732    masm.wasmStoreI64(*access, src.i64(), memoryBase, ptr, ptr);
    733  } else if (src.tag == AnyReg::I64) {
    734    masm.wasmStore(*access, AnyRegister(src.i64().low), memoryBase, ptr, ptr);
    735  } else {
    736    masm.wasmStore(*access, src.any(), memoryBase, ptr, ptr);
    737  }
    738 #elif defined(JS_CODEGEN_MIPS64)
    739  if (IsUnaligned(*access)) {
    740    switch (src.tag) {
    741      case AnyReg::I64:
    742        masm.wasmUnalignedStoreI64(*access, src.i64(), memoryBase, ptr, ptr,
    743                                   temp);
    744        break;
    745      case AnyReg::F32:
    746        masm.wasmUnalignedStoreFP(*access, src.f32(), memoryBase, ptr, ptr,
    747                                  temp);
    748        break;
    749      case AnyReg::F64:
    750        masm.wasmUnalignedStoreFP(*access, src.f64(), memoryBase, ptr, ptr,
    751                                  temp);
    752        break;
    753      case AnyReg::I32:
    754        masm.wasmUnalignedStore(*access, src.i32(), memoryBase, ptr, ptr, temp);
    755        break;
    756      default:
    757        MOZ_CRASH("Unexpected type");
    758    }
    759  } else {
    760    if (src.tag == AnyReg::I64) {
    761      masm.wasmStoreI64(*access, src.i64(), memoryBase, ptr, ptr);
    762    } else {
    763      masm.wasmStore(*access, src.any(), memoryBase, ptr, ptr);
    764    }
    765  }
    766 #elif defined(JS_CODEGEN_ARM64)
    767  MOZ_ASSERT(temp.isInvalid());
    768  if (access->type() == Scalar::Int64) {
    769    masm.wasmStoreI64(*access, src.i64(), memoryBase, ptr);
    770  } else {
    771    masm.wasmStore(*access, src.any(), memoryBase, ptr);
    772  }
    773 #elif defined(JS_CODEGEN_LOONG64)
    774  MOZ_ASSERT(temp.isInvalid());
    775  if (access->type() == Scalar::Int64) {
    776    masm.wasmStoreI64(*access, src.i64(), memoryBase, ptr, ptr);
    777  } else {
    778    masm.wasmStore(*access, src.any(), memoryBase, ptr, ptr);
    779  }
    780 #elif defined(JS_CODEGEN_RISCV64)
    781  MOZ_ASSERT(temp.isInvalid());
    782  if (access->type() == Scalar::Int64) {
    783    masm.wasmStoreI64(*access, src.i64(), memoryBase, ptr, ptr);
    784  } else {
    785    masm.wasmStore(*access, src.any(), memoryBase, ptr, ptr);
    786  }
    787 #else
    788  MOZ_CRASH("BaseCompiler platform hook: store");
    789 #endif
    790 }
    791 
    792 // ptr and src must not be the same register.
    793 // This may destroy ptr and src.
    794 void BaseCompiler::store(MemoryAccessDesc* access, AccessCheck* check,
    795                         RegPtr instance, RegPtr memoryBase, RegI32 ptr,
    796                         AnyReg src, RegI32 temp) {
    797  prepareMemoryAccess(access, check, instance, ptr);
    798  executeStore(access, check, instance, memoryBase, ptr, src, temp);
    799 }
    800 
    801 void BaseCompiler::store(MemoryAccessDesc* access, AccessCheck* check,
    802                         RegPtr instance, RegPtr memoryBase, RegI64 ptr,
    803                         AnyReg src, RegI64 temp) {
    804  prepareMemoryAccess(access, check, instance, ptr);
    805  // See comments in load()
    806 #if !defined(JS_64BIT)
    807  return executeStore(access, check, instance, memoryBase, RegI32(ptr.low), src,
    808                      maybeFromI64(temp));
    809 #elif defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM64) ||    \
    810    defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || \
    811    defined(JS_CODEGEN_RISCV64)
    812  return executeStore(access, check, instance, memoryBase, RegI32(ptr.reg), src,
    813                      maybeFromI64(temp));
    814 #else
    815  MOZ_CRASH("Missing platform hook");
    816 #endif
    817 }
    818 
    819 template <typename RegType>
    820 void BaseCompiler::doLoadCommon(MemoryAccessDesc* access, AccessCheck check,
    821                                ValType type) {
    822  RegPtr instance;
    823  RegPtr memoryBase;
    824  RegType temp;
    825 #if defined(JS_CODEGEN_MIPS64)
    826  temp = need<RegType>();
    827 #endif
    828 
    829  switch (type.kind()) {
    830    case ValType::I32: {
    831      RegType rp = popMemoryAccess<RegType>(access, &check);
    832      RegI32 rv = needI32();
    833      instance = maybeLoadInstanceForAccess(access, check);
    834      memoryBase = maybeLoadMemoryBaseForAccess(instance, access);
    835      load(access, &check, instance, memoryBase, rp, AnyReg(rv), temp);
    836      push(rv);
    837      free(rp);
    838      break;
    839    }
    840    case ValType::I64: {
    841      RegI64 rv;
    842      RegType rp;
    843 #ifdef JS_CODEGEN_X86
    844      rv = specific_.abiReturnRegI64;
    845      needI64(rv);
    846      rp = popMemoryAccess<RegType>(access, &check);
    847 #else
    848      rp = popMemoryAccess<RegType>(access, &check);
    849      rv = needI64();
    850 #endif
    851      instance = maybeLoadInstanceForAccess(access, check);
    852      memoryBase = maybeLoadMemoryBaseForAccess(instance, access);
    853      load(access, &check, instance, memoryBase, rp, AnyReg(rv), temp);
    854      push(rv);
    855      free(rp);
    856      break;
    857    }
    858    case ValType::F32: {
    859      RegType rp = popMemoryAccess<RegType>(access, &check);
    860      RegF32 rv = needF32();
    861      instance = maybeLoadInstanceForAccess(access, check);
    862      memoryBase = maybeLoadMemoryBaseForAccess(instance, access);
    863      load(access, &check, instance, memoryBase, rp, AnyReg(rv), temp);
    864      push(rv);
    865      free(rp);
    866      break;
    867    }
    868    case ValType::F64: {
    869      RegType rp = popMemoryAccess<RegType>(access, &check);
    870      RegF64 rv = needF64();
    871      instance = maybeLoadInstanceForAccess(access, check);
    872      memoryBase = maybeLoadMemoryBaseForAccess(instance, access);
    873      load(access, &check, instance, memoryBase, rp, AnyReg(rv), temp);
    874      push(rv);
    875      free(rp);
    876      break;
    877    }
    878 #ifdef ENABLE_WASM_SIMD
    879    case ValType::V128: {
    880      RegType rp = popMemoryAccess<RegType>(access, &check);
    881      RegV128 rv = needV128();
    882      instance = maybeLoadInstanceForAccess(access, check);
    883      memoryBase = maybeLoadMemoryBaseForAccess(instance, access);
    884      load(access, &check, instance, memoryBase, rp, AnyReg(rv), temp);
    885      push(rv);
    886      free(rp);
    887      break;
    888    }
    889 #endif
    890    default:
    891      MOZ_CRASH("load type");
    892      break;
    893  }
    894 
    895 #ifndef RABALDR_PIN_INSTANCE
    896  maybeFree(instance);
    897 #endif
    898 #ifdef WASM_HAS_HEAPREG
    899  if (memoryBase != HeapReg) {
    900    maybeFree(memoryBase);
    901  }
    902 #else
    903  maybeFree(memoryBase);
    904 #endif
    905  maybeFree(temp);
    906 }
    907 
    908 void BaseCompiler::loadCommon(MemoryAccessDesc* access, AccessCheck check,
    909                              ValType type) {
    910  if (isMem32(access->memoryIndex())) {
    911    doLoadCommon<RegI32>(access, check, type);
    912  } else {
    913    doLoadCommon<RegI64>(access, check, type);
    914  }
    915 }
    916 
    917 template <typename RegType>
    918 void BaseCompiler::doStoreCommon(MemoryAccessDesc* access, AccessCheck check,
    919                                 ValType resultType) {
    920  RegPtr instance;
    921  RegPtr memoryBase;
    922  RegType temp;
    923 #if defined(JS_CODEGEN_MIPS64)
    924  temp = need<RegType>();
    925 #endif
    926 
    927  switch (resultType.kind()) {
    928    case ValType::I32: {
    929      RegI32 rv = popI32();
    930      RegType rp = popMemoryAccess<RegType>(access, &check);
    931      instance = maybeLoadInstanceForAccess(access, check);
    932      memoryBase = maybeLoadMemoryBaseForAccess(instance, access);
    933      store(access, &check, instance, memoryBase, rp, AnyReg(rv), temp);
    934      free(rp);
    935      free(rv);
    936      break;
    937    }
    938    case ValType::I64: {
    939      RegI64 rv = popI64();
    940      RegType rp = popMemoryAccess<RegType>(access, &check);
    941      instance = maybeLoadInstanceForAccess(access, check);
    942      memoryBase = maybeLoadMemoryBaseForAccess(instance, access);
    943      store(access, &check, instance, memoryBase, rp, AnyReg(rv), temp);
    944      free(rp);
    945      free(rv);
    946      break;
    947    }
    948    case ValType::F32: {
    949      RegF32 rv = popF32();
    950      RegType rp = popMemoryAccess<RegType>(access, &check);
    951      instance = maybeLoadInstanceForAccess(access, check);
    952      memoryBase = maybeLoadMemoryBaseForAccess(instance, access);
    953      store(access, &check, instance, memoryBase, rp, AnyReg(rv), temp);
    954      free(rp);
    955      free(rv);
    956      break;
    957    }
    958    case ValType::F64: {
    959      RegF64 rv = popF64();
    960      RegType rp = popMemoryAccess<RegType>(access, &check);
    961      instance = maybeLoadInstanceForAccess(access, check);
    962      memoryBase = maybeLoadMemoryBaseForAccess(instance, access);
    963      store(access, &check, instance, memoryBase, rp, AnyReg(rv), temp);
    964      free(rp);
    965      free(rv);
    966      break;
    967    }
    968 #ifdef ENABLE_WASM_SIMD
    969    case ValType::V128: {
    970      RegV128 rv = popV128();
    971      RegType rp = popMemoryAccess<RegType>(access, &check);
    972      instance = maybeLoadInstanceForAccess(access, check);
    973      memoryBase = maybeLoadMemoryBaseForAccess(instance, access);
    974      store(access, &check, instance, memoryBase, rp, AnyReg(rv), temp);
    975      free(rp);
    976      free(rv);
    977      break;
    978    }
    979 #endif
    980    default:
    981      MOZ_CRASH("store type");
    982      break;
    983  }
    984 
    985 #ifndef RABALDR_PIN_INSTANCE
    986  maybeFree(instance);
    987 #endif
    988 #ifdef WASM_HAS_HEAPREG
    989  if (memoryBase != HeapReg) {
    990    maybeFree(memoryBase);
    991  }
    992 #else
    993  maybeFree(memoryBase);
    994 #endif
    995  maybeFree(temp);
    996 }
    997 
    998 void BaseCompiler::storeCommon(MemoryAccessDesc* access, AccessCheck check,
    999                               ValType type) {
   1000  if (isMem32(access->memoryIndex())) {
   1001    doStoreCommon<RegI32>(access, check, type);
   1002  } else {
   1003    doStoreCommon<RegI64>(access, check, type);
   1004  }
   1005 }
   1006 
   1007 // Convert something that may contain a heap index into a Register that can be
   1008 // used in an access.
   1009 
   1010 static inline Register ToRegister(RegI32 r) { return Register(r); }
   1011 #ifdef JS_PUNBOX64
   1012 static inline Register ToRegister(RegI64 r) { return r.reg; }
   1013 #else
   1014 static inline Register ToRegister(RegI64 r) { return r.low; }
   1015 #endif
   1016 
   1017 //////////////////////////////////////////////////////////////////////////////
   1018 //
   1019 // Atomic operations.
   1020 //
   1021 // The atomic operations have very diverse per-platform needs for register
   1022 // allocation and temps.  To handle that, the implementations are structured as
   1023 // a per-operation framework method that calls into platform-specific helpers
   1024 // (usually called PopAndAllocate, Perform, and Deallocate) in a per-operation
   1025 // namespace.  This structure results in a little duplication and boilerplate
   1026 // but is otherwise clean and flexible and keeps code and supporting definitions
   1027 // entirely co-located.
   1028 
   1029 // Some consumers depend on the returned Address not incorporating instance, as
   1030 // instance may be the scratch register.
   1031 //
   1032 // RegAddressType is RegI32 for Memory32 and RegI64 for Memory64.
   1033 template <typename RegAddressType>
   1034 Address BaseCompiler::prepareAtomicMemoryAccess(MemoryAccessDesc* access,
   1035                                                AccessCheck* check,
   1036                                                RegPtr instance,
   1037                                                RegAddressType ptr) {
   1038  MOZ_ASSERT(needInstanceForAccess(access, *check) == instance.isValid());
   1039  prepareMemoryAccess(access, check, instance, ptr);
   1040 
   1041 #ifdef WASM_HAS_HEAPREG
   1042  if (access->memoryIndex() == 0) {
   1043    masm.addPtr(HeapReg, ToRegister(ptr));
   1044  } else {
   1045    masm.addPtr(
   1046        Address(instance, instanceOffsetOfMemoryBase(access->memoryIndex())),
   1047        ToRegister(ptr));
   1048  }
   1049 #else
   1050  masm.addPtr(
   1051      Address(instance, instanceOffsetOfMemoryBase(access->memoryIndex())),
   1052      ToRegister(ptr));
   1053 #endif
   1054 
   1055  // At this point, 64-bit offsets will have been folded away by
   1056  // prepareMemoryAccess.
   1057  return Address(ToRegister(ptr), access->offset32());
   1058 }
   1059 
   1060 #ifndef WASM_HAS_HEAPREG
   1061 #  ifdef JS_CODEGEN_X86
   1062 using ScratchAtomicNoHeapReg = ScratchEBX;
   1063 #  else
   1064 #    error "Unimplemented porting interface"
   1065 #  endif
   1066 #endif
   1067 
   1068 //////////////////////////////////////////////////////////////////////////////
   1069 //
   1070 // Atomic load and store.
   1071 
   1072 namespace atomic_load64 {
   1073 
   1074 #ifdef JS_CODEGEN_ARM
   1075 
   1076 static void Allocate(BaseCompiler* bc, RegI64* rd, RegI64*) {
   1077  *rd = bc->needI64Pair();
   1078 }
   1079 
   1080 static void Deallocate(BaseCompiler* bc, RegI64) {}
   1081 
   1082 #elif defined JS_CODEGEN_X86
   1083 
   1084 static void Allocate(BaseCompiler* bc, RegI64* rd, RegI64* temp) {
   1085  // The result is in edx:eax, and we need ecx:ebx as a temp.  But ebx will also
   1086  // be used as a scratch, so don't manage that here.
   1087  bc->needI32(bc->specific_.ecx);
   1088  *temp = bc->specific_.ecx_ebx;
   1089  bc->needI64(bc->specific_.edx_eax);
   1090  *rd = bc->specific_.edx_eax;
   1091 }
   1092 
   1093 static void Deallocate(BaseCompiler* bc, RegI64 temp) {
   1094  // See comment above.
   1095  MOZ_ASSERT(temp.high == js::jit::ecx);
   1096  bc->freeI32(bc->specific_.ecx);
   1097 }
   1098 
   1099 #elif defined(__wasi__) || (defined(JS_CODEGEN_NONE) && !defined(JS_64BIT))
   1100 
   1101 static void Allocate(BaseCompiler*, RegI64*, RegI64*) {}
   1102 static void Deallocate(BaseCompiler*, RegI64) {}
   1103 
   1104 #endif
   1105 
   1106 }  // namespace atomic_load64
   1107 
   1108 #if !defined(JS_64BIT)
   1109 template <typename RegAddressType>
   1110 void BaseCompiler::atomicLoad64(MemoryAccessDesc* access) {
   1111  RegI64 rd, temp;
   1112  atomic_load64::Allocate(this, &rd, &temp);
   1113 
   1114  AccessCheck check;
   1115  RegAddressType rp = popMemoryAccess<RegAddressType>(access, &check);
   1116 
   1117 #  ifdef WASM_HAS_HEAPREG
   1118  RegPtr instance = maybeLoadInstanceForAccess(access, check);
   1119  auto memaddr = prepareAtomicMemoryAccess(access, &check, instance, rp);
   1120  masm.wasmAtomicLoad64(*access, memaddr, temp, rd);
   1121 #    ifndef RABALDR_PIN_INSTANCE
   1122  maybeFree(instance);
   1123 #    endif
   1124 #  else
   1125  ScratchAtomicNoHeapReg scratch(*this);
   1126  RegPtr instance =
   1127      maybeLoadInstanceForAccess(access, check, RegIntptrToRegPtr(scratch));
   1128  auto memaddr = prepareAtomicMemoryAccess(access, &check, instance, rp);
   1129  masm.wasmAtomicLoad64(*access, memaddr, temp, rd);
   1130  MOZ_ASSERT(instance == scratch);
   1131 #  endif
   1132 
   1133  free(rp);
   1134  atomic_load64::Deallocate(this, temp);
   1135  pushI64(rd);
   1136 }
   1137 #endif
   1138 
   1139 void BaseCompiler::atomicLoad(MemoryAccessDesc* access, ValType type) {
   1140  Scalar::Type viewType = access->type();
   1141  if (Scalar::byteSize(viewType) <= sizeof(void*)) {
   1142    loadCommon(access, AccessCheck(), type);
   1143    return;
   1144  }
   1145 
   1146  MOZ_ASSERT(type == ValType::I64 && Scalar::byteSize(viewType) == 8);
   1147 
   1148 #if !defined(JS_64BIT)
   1149  if (isMem32(access->memoryIndex())) {
   1150    atomicLoad64<RegI32>(access);
   1151  } else {
   1152    atomicLoad64<RegI64>(access);
   1153  }
   1154 #else
   1155  MOZ_CRASH("Should not happen");
   1156 #endif
   1157 }
   1158 
   1159 void BaseCompiler::atomicStore(MemoryAccessDesc* access, ValType type) {
   1160  Scalar::Type viewType = access->type();
   1161 
   1162  if (Scalar::byteSize(viewType) <= sizeof(void*)) {
   1163    storeCommon(access, AccessCheck(), type);
   1164    return;
   1165  }
   1166 
   1167  MOZ_ASSERT(type == ValType::I64 && Scalar::byteSize(viewType) == 8);
   1168 
   1169 #if !defined(JS_64BIT)
   1170  if (isMem32(access->memoryIndex())) {
   1171    atomicXchg64<RegI32>(access, WantResult(false));
   1172  } else {
   1173    atomicXchg64<RegI64>(access, WantResult(false));
   1174  }
   1175 #else
   1176  MOZ_CRASH("Should not happen");
   1177 #endif
   1178 }
   1179 
   1180 //////////////////////////////////////////////////////////////////////////////
   1181 //
   1182 // Atomic RMW op= operations.
   1183 
   1184 void BaseCompiler::atomicRMW(MemoryAccessDesc* access, ValType type,
   1185                             AtomicOp op) {
   1186  Scalar::Type viewType = access->type();
   1187  if (Scalar::byteSize(viewType) <= 4) {
   1188    if (isMem32(access->memoryIndex())) {
   1189      atomicRMW32<RegI32>(access, type, op);
   1190    } else {
   1191      atomicRMW32<RegI64>(access, type, op);
   1192    }
   1193  } else {
   1194    MOZ_ASSERT(type == ValType::I64 && Scalar::byteSize(viewType) == 8);
   1195    if (isMem32(access->memoryIndex())) {
   1196      atomicRMW64<RegI32>(access, type, op);
   1197    } else {
   1198      atomicRMW64<RegI64>(access, type, op);
   1199    }
   1200  }
   1201 }
   1202 
   1203 namespace atomic_rmw32 {
   1204 
   1205 #if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
   1206 
   1207 struct Temps {
   1208  // On x86 we use the ScratchI32 for the temp, otherwise we'd run out of
   1209  // registers for 64-bit operations.
   1210 #  if defined(JS_CODEGEN_X64)
   1211  RegI32 t0;
   1212 #  endif
   1213 };
   1214 
   1215 static void PopAndAllocate(BaseCompiler* bc, ValType type,
   1216                           Scalar::Type viewType, AtomicOp op, RegI32* rd,
   1217                           RegI32* rv, Temps* temps) {
   1218  bc->needI32(bc->specific_.eax);
   1219  if (op == AtomicOp::Add || op == AtomicOp::Sub) {
   1220    // We use xadd, so source and destination are the same.  Using
   1221    // eax here is overconstraining, but for byte operations on x86
   1222    // we do need something with a byte register.
   1223    if (type == ValType::I64) {
   1224      *rv = bc->popI64ToSpecificI32(bc->specific_.eax);
   1225    } else {
   1226      *rv = bc->popI32ToSpecific(bc->specific_.eax);
   1227    }
   1228    *rd = *rv;
   1229  } else {
   1230    // We use a cmpxchg loop.  The output must be eax; the input
   1231    // must be in a separate register since it may be used several
   1232    // times.
   1233    if (type == ValType::I64) {
   1234      *rv = bc->popI64ToI32();
   1235    } else {
   1236      *rv = bc->popI32();
   1237    }
   1238    *rd = bc->specific_.eax;
   1239 #  ifdef JS_CODEGEN_X64
   1240    temps->t0 = bc->needI32();
   1241 #  endif
   1242  }
   1243 }
   1244 
   1245 template <typename T>
   1246 static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access, T srcAddr,
   1247                    AtomicOp op, RegI32 rv, RegI32 rd, const Temps& temps) {
   1248 #  ifdef JS_CODEGEN_X64
   1249  RegI32 temp = temps.t0;
   1250 #  else
   1251  RegI32 temp;
   1252  ScratchI32 scratch(*bc);
   1253  if (op != AtomicOp::Add && op != AtomicOp::Sub) {
   1254    temp = scratch;
   1255  }
   1256 #  endif
   1257  bc->masm.wasmAtomicFetchOp(access, op, rv, srcAddr, temp, rd);
   1258 }
   1259 
   1260 static void Deallocate(BaseCompiler* bc, RegI32 rv, const Temps& temps) {
   1261  if (rv != bc->specific_.eax) {
   1262    bc->freeI32(rv);
   1263  }
   1264 #  ifdef JS_CODEGEN_X64
   1265  bc->maybeFree(temps.t0);
   1266 #  endif
   1267 }
   1268 
   1269 #elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64)
   1270 
   1271 struct Temps {
   1272  RegI32 t0;
   1273 };
   1274 
   1275 static void PopAndAllocate(BaseCompiler* bc, ValType type,
   1276                           Scalar::Type viewType, AtomicOp op, RegI32* rd,
   1277                           RegI32* rv, Temps* temps) {
   1278  *rv = type == ValType::I64 ? bc->popI64ToI32() : bc->popI32();
   1279  temps->t0 = bc->needI32();
   1280  *rd = bc->needI32();
   1281 }
   1282 
   1283 static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
   1284                    Address srcAddr, AtomicOp op, RegI32 rv, RegI32 rd,
   1285                    const Temps& temps) {
   1286  bc->masm.wasmAtomicFetchOp(access, op, rv, srcAddr, temps.t0, rd);
   1287 }
   1288 
   1289 static void Deallocate(BaseCompiler* bc, RegI32 rv, const Temps& temps) {
   1290  bc->freeI32(rv);
   1291  bc->freeI32(temps.t0);
   1292 }
   1293 
   1294 #elif defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64)
   1295 
   1296 struct Temps {
   1297  RegI32 t0, t1, t2;
   1298 };
   1299 
   1300 static void PopAndAllocate(BaseCompiler* bc, ValType type,
   1301                           Scalar::Type viewType, AtomicOp op, RegI32* rd,
   1302                           RegI32* rv, Temps* temps) {
   1303  *rv = type == ValType::I64 ? bc->popI64ToI32() : bc->popI32();
   1304  if (type == ValType::I64) {
   1305    // Architecture-specific i64-to-i32.
   1306    bc->masm.move64To32(Register64(*rv), *rv);
   1307  }
   1308  if (Scalar::byteSize(viewType) < 4) {
   1309    temps->t0 = bc->needI32();
   1310    temps->t1 = bc->needI32();
   1311    temps->t2 = bc->needI32();
   1312  }
   1313  *rd = bc->needI32();
   1314 }
   1315 
   1316 static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
   1317                    Address srcAddr, AtomicOp op, RegI32 rv, RegI32 rd,
   1318                    const Temps& temps) {
   1319  bc->masm.wasmAtomicFetchOp(access, op, rv, srcAddr, temps.t0, temps.t1,
   1320                             temps.t2, rd);
   1321 }
   1322 
   1323 static void Deallocate(BaseCompiler* bc, RegI32 rv, const Temps& temps) {
   1324  bc->freeI32(rv);
   1325  bc->maybeFree(temps.t0);
   1326  bc->maybeFree(temps.t1);
   1327  bc->maybeFree(temps.t2);
   1328 }
   1329 
   1330 #elif defined(JS_CODEGEN_RISCV64)
   1331 
   1332 struct Temps {
   1333  RegI32 t0, t1, t2;
   1334 };
   1335 
   1336 static void PopAndAllocate(BaseCompiler* bc, ValType type,
   1337                           Scalar::Type viewType, AtomicOp op, RegI32* rd,
   1338                           RegI32* rv, Temps* temps) {
   1339  *rv = type == ValType::I64 ? bc->popI64ToI32() : bc->popI32();
   1340  if (Scalar::byteSize(viewType) < 4) {
   1341    temps->t0 = bc->needI32();
   1342    temps->t1 = bc->needI32();
   1343    temps->t2 = bc->needI32();
   1344  }
   1345  *rd = bc->needI32();
   1346 }
   1347 
   1348 static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
   1349                    Address srcAddr, AtomicOp op, RegI32 rv, RegI32 rd,
   1350                    const Temps& temps) {
   1351  bc->masm.wasmAtomicFetchOp(access, op, rv, srcAddr, temps.t0, temps.t1,
   1352                             temps.t2, rd);
   1353 }
   1354 
   1355 static void Deallocate(BaseCompiler* bc, RegI32 rv, const Temps& temps) {
   1356  bc->freeI32(rv);
   1357  bc->maybeFree(temps.t0);
   1358  bc->maybeFree(temps.t1);
   1359  bc->maybeFree(temps.t2);
   1360 }
   1361 
   1362 #elif defined(JS_CODEGEN_NONE) || defined(JS_CODEGEN_WASM32)
   1363 
   1364 using Temps = Nothing;
   1365 
   1366 static void PopAndAllocate(BaseCompiler*, ValType, Scalar::Type, AtomicOp,
   1367                           RegI32*, RegI32*, Temps*) {}
   1368 
   1369 static void Perform(BaseCompiler*, const MemoryAccessDesc&, Address, AtomicOp,
   1370                    RegI32, RegI32, const Temps&) {}
   1371 
   1372 static void Deallocate(BaseCompiler*, RegI32, const Temps&) {}
   1373 
   1374 #endif
   1375 
   1376 }  // namespace atomic_rmw32
   1377 
   1378 template <typename RegAddressType>
   1379 void BaseCompiler::atomicRMW32(MemoryAccessDesc* access, ValType type,
   1380                               AtomicOp op) {
   1381  Scalar::Type viewType = access->type();
   1382  RegI32 rd, rv;
   1383  atomic_rmw32::Temps temps;
   1384  atomic_rmw32::PopAndAllocate(this, type, viewType, op, &rd, &rv, &temps);
   1385 
   1386  AccessCheck check;
   1387  RegAddressType rp = popMemoryAccess<RegAddressType>(access, &check);
   1388  RegPtr instance = maybeLoadInstanceForAccess(access, check);
   1389 
   1390  auto memaddr = prepareAtomicMemoryAccess(access, &check, instance, rp);
   1391  atomic_rmw32::Perform(this, *access, memaddr, op, rv, rd, temps);
   1392 
   1393 #ifndef RABALDR_PIN_INSTANCE
   1394  maybeFree(instance);
   1395 #endif
   1396  atomic_rmw32::Deallocate(this, rv, temps);
   1397  free(rp);
   1398 
   1399  if (type == ValType::I64) {
   1400    pushU32AsI64(rd);
   1401  } else {
   1402    pushI32(rd);
   1403  }
   1404 }
   1405 
   1406 namespace atomic_rmw64 {
   1407 
   1408 #if defined(JS_CODEGEN_X64)
   1409 
   1410 static void PopAndAllocate(BaseCompiler* bc, AtomicOp op, RegI64* rd,
   1411                           RegI64* rv, RegI64* temp) {
   1412  if (op == AtomicOp::Add || op == AtomicOp::Sub) {
   1413    // We use xaddq, so input and output must be the same register.
   1414    *rv = bc->popI64();
   1415    *rd = *rv;
   1416  } else {
   1417    // We use a cmpxchgq loop, so the output must be rax and we need a temp.
   1418    bc->needI64(bc->specific_.rax);
   1419    *rd = bc->specific_.rax;
   1420    *rv = bc->popI64();
   1421    *temp = bc->needI64();
   1422  }
   1423 }
   1424 
   1425 static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
   1426                    Address srcAddr, AtomicOp op, RegI64 rv, RegI64 temp,
   1427                    RegI64 rd) {
   1428  bc->masm.wasmAtomicFetchOp64(access, op, rv, srcAddr, temp, rd);
   1429 }
   1430 
   1431 static void Deallocate(BaseCompiler* bc, AtomicOp op, RegI64 rv, RegI64 temp) {
   1432  bc->maybeFree(temp);
   1433  if (op != AtomicOp::Add && op != AtomicOp::Sub) {
   1434    bc->freeI64(rv);
   1435  }
   1436 }
   1437 
   1438 #elif defined(JS_CODEGEN_X86)
   1439 
   1440 // Register allocation is tricky, see comments at atomic_xchg64 below.
   1441 //
   1442 // - Initially rv=ecx:edx and eax is reserved, rd=unallocated.
   1443 // - Then rp is popped into esi+edi because those are the only available.
   1444 // - The Setup operation makes rd=edx:eax.
   1445 // - Deallocation then frees only the ecx part of rv.
   1446 //
   1447 // The temp is unused here.
   1448 
   1449 static void PopAndAllocate(BaseCompiler* bc, AtomicOp op, RegI64* rd,
   1450                           RegI64* rv, RegI64*) {
   1451  bc->needI32(bc->specific_.eax);
   1452  bc->needI32(bc->specific_.ecx);
   1453  bc->needI32(bc->specific_.edx);
   1454  *rv = RegI64(Register64(bc->specific_.ecx, bc->specific_.edx));
   1455  bc->popI64ToSpecific(*rv);
   1456 }
   1457 
   1458 static void Setup(BaseCompiler* bc, RegI64* rd) { *rd = bc->specific_.edx_eax; }
   1459 
   1460 static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
   1461                    Address srcAddr, AtomicOp op, RegI64 rv, RegI64, RegI64 rd,
   1462                    const ScratchAtomicNoHeapReg& scratch) {
   1463  MOZ_ASSERT(rv.high == bc->specific_.ecx);
   1464  MOZ_ASSERT(Register(scratch) == js::jit::ebx);
   1465 
   1466  bc->fr.pushGPR(rv.high);
   1467  bc->fr.pushGPR(rv.low);
   1468  Address value(StackPointer, 0);
   1469 
   1470  bc->masm.wasmAtomicFetchOp64(access, op, value, srcAddr,
   1471                               bc->specific_.ecx_ebx, rd);
   1472 
   1473  bc->fr.popBytes(8);
   1474 }
   1475 
   1476 static void Deallocate(BaseCompiler* bc, AtomicOp, RegI64, RegI64) {
   1477  bc->freeI32(bc->specific_.ecx);
   1478 }
   1479 
   1480 #elif defined(JS_CODEGEN_ARM)
   1481 
   1482 static void PopAndAllocate(BaseCompiler* bc, AtomicOp op, RegI64* rd,
   1483                           RegI64* rv, RegI64* temp) {
   1484  // We use a ldrex/strexd loop so the temp and the output must be
   1485  // odd/even pairs.
   1486  *rv = bc->popI64();
   1487  *temp = bc->needI64Pair();
   1488  *rd = bc->needI64Pair();
   1489 }
   1490 
   1491 static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
   1492                    Address srcAddr, AtomicOp op, RegI64 rv, RegI64 temp,
   1493                    RegI64 rd) {
   1494  bc->masm.wasmAtomicFetchOp64(access, op, rv, srcAddr, temp, rd);
   1495 }
   1496 
   1497 static void Deallocate(BaseCompiler* bc, AtomicOp op, RegI64 rv, RegI64 temp) {
   1498  bc->freeI64(rv);
   1499  bc->freeI64(temp);
   1500 }
   1501 
   1502 #elif defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS64) || \
   1503    defined(JS_CODEGEN_LOONG64)
   1504 
   1505 static void PopAndAllocate(BaseCompiler* bc, AtomicOp op, RegI64* rd,
   1506                           RegI64* rv, RegI64* temp) {
   1507  *rv = bc->popI64();
   1508  *temp = bc->needI64();
   1509  *rd = bc->needI64();
   1510 }
   1511 
   1512 static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
   1513                    Address srcAddr, AtomicOp op, RegI64 rv, RegI64 temp,
   1514                    RegI64 rd) {
   1515  bc->masm.wasmAtomicFetchOp64(access, op, rv, srcAddr, temp, rd);
   1516 }
   1517 
   1518 static void Deallocate(BaseCompiler* bc, AtomicOp op, RegI64 rv, RegI64 temp) {
   1519  bc->freeI64(rv);
   1520  bc->freeI64(temp);
   1521 }
   1522 #elif defined(JS_CODEGEN_RISCV64)
   1523 
   1524 static void PopAndAllocate(BaseCompiler* bc, AtomicOp op, RegI64* rd,
   1525                           RegI64* rv, RegI64* temp) {
   1526  *rv = bc->popI64();
   1527  *temp = bc->needI64();
   1528  *rd = bc->needI64();
   1529 }
   1530 
   1531 static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
   1532                    Address srcAddr, AtomicOp op, RegI64 rv, RegI64 temp,
   1533                    RegI64 rd) {
   1534  bc->masm.wasmAtomicFetchOp64(access, op, rv, srcAddr, temp, rd);
   1535 }
   1536 
   1537 static void Deallocate(BaseCompiler* bc, AtomicOp op, RegI64 rv, RegI64 temp) {
   1538  bc->freeI64(rv);
   1539  bc->freeI64(temp);
   1540 }
   1541 
   1542 #elif defined(JS_CODEGEN_NONE) || defined(JS_CODEGEN_WASM32)
   1543 
   1544 static void PopAndAllocate(BaseCompiler*, AtomicOp, RegI64*, RegI64*, RegI64*) {
   1545 }
   1546 
   1547 static void Perform(BaseCompiler*, const MemoryAccessDesc&, Address,
   1548                    AtomicOp op, RegI64, RegI64, RegI64) {}
   1549 
   1550 static void Deallocate(BaseCompiler*, AtomicOp, RegI64, RegI64) {}
   1551 
   1552 #endif
   1553 
   1554 }  // namespace atomic_rmw64
   1555 
   1556 template <typename RegAddressType>
   1557 void BaseCompiler::atomicRMW64(MemoryAccessDesc* access, ValType type,
   1558                               AtomicOp op) {
   1559  RegI64 rd, rv, temp;
   1560  atomic_rmw64::PopAndAllocate(this, op, &rd, &rv, &temp);
   1561 
   1562  AccessCheck check;
   1563  RegAddressType rp = popMemoryAccess<RegAddressType>(access, &check);
   1564 
   1565 #if defined(WASM_HAS_HEAPREG)
   1566  RegPtr instance = maybeLoadInstanceForAccess(access, check);
   1567  auto memaddr = prepareAtomicMemoryAccess(access, &check, instance, rp);
   1568  atomic_rmw64::Perform(this, *access, memaddr, op, rv, temp, rd);
   1569 #  ifndef RABALDR_PIN_INSTANCE
   1570  maybeFree(instance);
   1571 #  endif
   1572 #else
   1573  ScratchAtomicNoHeapReg scratch(*this);
   1574  RegPtr instance =
   1575      maybeLoadInstanceForAccess(access, check, RegIntptrToRegPtr(scratch));
   1576  auto memaddr = prepareAtomicMemoryAccess(access, &check, instance, rp);
   1577  atomic_rmw64::Setup(this, &rd);
   1578  atomic_rmw64::Perform(this, *access, memaddr, op, rv, temp, rd, scratch);
   1579  MOZ_ASSERT(instance == scratch);
   1580 #endif
   1581 
   1582  free(rp);
   1583  atomic_rmw64::Deallocate(this, op, rv, temp);
   1584 
   1585  pushI64(rd);
   1586 }
   1587 
   1588 //////////////////////////////////////////////////////////////////////////////
   1589 //
   1590 // Atomic exchange (also used for atomic store in some cases).
   1591 
   1592 void BaseCompiler::atomicXchg(MemoryAccessDesc* access, ValType type) {
   1593  Scalar::Type viewType = access->type();
   1594  if (Scalar::byteSize(viewType) <= 4) {
   1595    if (isMem32(access->memoryIndex())) {
   1596      atomicXchg32<RegI32>(access, type);
   1597    } else {
   1598      atomicXchg32<RegI64>(access, type);
   1599    }
   1600  } else {
   1601    MOZ_ASSERT(type == ValType::I64 && Scalar::byteSize(viewType) == 8);
   1602    if (isMem32(access->memoryIndex())) {
   1603      atomicXchg64<RegI32>(access, WantResult(true));
   1604    } else {
   1605      atomicXchg64<RegI64>(access, WantResult(true));
   1606    }
   1607  }
   1608 }
   1609 
   1610 namespace atomic_xchg32 {
   1611 
   1612 #if defined(JS_CODEGEN_X64)
   1613 
   1614 using Temps = Nothing;
   1615 
   1616 static void PopAndAllocate(BaseCompiler* bc, ValType type,
   1617                           Scalar::Type viewType, RegI32* rd, RegI32* rv,
   1618                           Temps*) {
   1619  // The xchg instruction reuses rv as rd.
   1620  *rv = (type == ValType::I64) ? bc->popI64ToI32() : bc->popI32();
   1621  *rd = *rv;
   1622 }
   1623 
   1624 static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
   1625                    Address srcAddr, RegI32 rv, RegI32 rd, const Temps&) {
   1626  bc->masm.wasmAtomicExchange(access, srcAddr, rv, rd);
   1627 }
   1628 
   1629 static void Deallocate(BaseCompiler* bc, RegI32, const Temps&) {}
   1630 
   1631 #elif defined(JS_CODEGEN_X86)
   1632 
   1633 using Temps = Nothing;
   1634 
   1635 static void PopAndAllocate(BaseCompiler* bc, ValType type,
   1636                           Scalar::Type viewType, RegI32* rd, RegI32* rv,
   1637                           Temps*) {
   1638  // The xchg instruction reuses rv as rd.
   1639  *rv = (type == ValType::I64) ? bc->popI64ToI32() : bc->popI32();
   1640  *rd = *rv;
   1641 }
   1642 
   1643 static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
   1644                    Address srcAddr, RegI32 rv, RegI32 rd, const Temps&) {
   1645  if (access.type() == Scalar::Uint8 && !bc->ra.isSingleByteI32(rd)) {
   1646    ScratchI8 scratch(*bc);
   1647    // The output register must have a byte persona.
   1648    bc->masm.wasmAtomicExchange(access, srcAddr, rv, scratch);
   1649    bc->masm.movl(scratch, rd);
   1650  } else {
   1651    bc->masm.wasmAtomicExchange(access, srcAddr, rv, rd);
   1652  }
   1653 }
   1654 
   1655 static void Deallocate(BaseCompiler* bc, RegI32, const Temps&) {}
   1656 
   1657 #elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64)
   1658 
   1659 using Temps = Nothing;
   1660 
   1661 static void PopAndAllocate(BaseCompiler* bc, ValType type,
   1662                           Scalar::Type viewType, RegI32* rd, RegI32* rv,
   1663                           Temps*) {
   1664  *rv = (type == ValType::I64) ? bc->popI64ToI32() : bc->popI32();
   1665  *rd = bc->needI32();
   1666 }
   1667 
   1668 static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
   1669                    Address srcAddr, RegI32 rv, RegI32 rd, const Temps&) {
   1670  bc->masm.wasmAtomicExchange(access, srcAddr, rv, rd);
   1671 }
   1672 
   1673 static void Deallocate(BaseCompiler* bc, RegI32 rv, const Temps&) {
   1674  bc->freeI32(rv);
   1675 }
   1676 
   1677 #elif defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64)
   1678 
   1679 struct Temps {
   1680  RegI32 t0, t1, t2;
   1681 };
   1682 
   1683 static void PopAndAllocate(BaseCompiler* bc, ValType type,
   1684                           Scalar::Type viewType, RegI32* rd, RegI32* rv,
   1685                           Temps* temps) {
   1686  *rv = (type == ValType::I64) ? bc->popI64ToI32() : bc->popI32();
   1687  if (type == ValType::I64) {
   1688    // Architecture-specific i64-to-i32.
   1689    bc->masm.move64To32(Register64(*rv), *rv);
   1690  }
   1691  if (Scalar::byteSize(viewType) < 4) {
   1692    temps->t0 = bc->needI32();
   1693    temps->t1 = bc->needI32();
   1694    temps->t2 = bc->needI32();
   1695  }
   1696  *rd = bc->needI32();
   1697 }
   1698 
   1699 static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
   1700                    Address srcAddr, RegI32 rv, RegI32 rd, const Temps& temps) {
   1701  bc->masm.wasmAtomicExchange(access, srcAddr, rv, temps.t0, temps.t1, temps.t2,
   1702                              rd);
   1703 }
   1704 
   1705 static void Deallocate(BaseCompiler* bc, RegI32 rv, const Temps& temps) {
   1706  bc->freeI32(rv);
   1707  bc->maybeFree(temps.t0);
   1708  bc->maybeFree(temps.t1);
   1709  bc->maybeFree(temps.t2);
   1710 }
   1711 
   1712 #elif defined(JS_CODEGEN_RISCV64)
   1713 
   1714 struct Temps {
   1715  RegI32 t0, t1, t2;
   1716 };
   1717 
   1718 static void PopAndAllocate(BaseCompiler* bc, ValType type,
   1719                           Scalar::Type viewType, RegI32* rd, RegI32* rv,
   1720                           Temps* temps) {
   1721  *rv = (type == ValType::I64) ? bc->popI64ToI32() : bc->popI32();
   1722  if (Scalar::byteSize(viewType) < 4) {
   1723    temps->t0 = bc->needI32();
   1724    temps->t1 = bc->needI32();
   1725    temps->t2 = bc->needI32();
   1726  }
   1727  *rd = bc->needI32();
   1728 }
   1729 
   1730 static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
   1731                    Address srcAddr, RegI32 rv, RegI32 rd, const Temps& temps) {
   1732  bc->masm.wasmAtomicExchange(access, srcAddr, rv, temps.t0, temps.t1, temps.t2,
   1733                              rd);
   1734 }
   1735 
   1736 static void Deallocate(BaseCompiler* bc, RegI32 rv, const Temps& temps) {
   1737  bc->freeI32(rv);
   1738  bc->maybeFree(temps.t0);
   1739  bc->maybeFree(temps.t1);
   1740  bc->maybeFree(temps.t2);
   1741 }
   1742 
   1743 #elif defined(JS_CODEGEN_NONE) || defined(JS_CODEGEN_WASM32)
   1744 
   1745 using Temps = Nothing;
   1746 
   1747 static void PopAndAllocate(BaseCompiler*, ValType, Scalar::Type, RegI32*,
   1748                           RegI32*, Temps*) {}
   1749 static void Perform(BaseCompiler*, const MemoryAccessDesc&, Address, RegI32,
   1750                    RegI32, const Temps&) {}
   1751 static void Deallocate(BaseCompiler*, RegI32, const Temps&) {}
   1752 
   1753 #endif
   1754 
   1755 }  // namespace atomic_xchg32
   1756 
   1757 template <typename RegAddressType>
   1758 void BaseCompiler::atomicXchg32(MemoryAccessDesc* access, ValType type) {
   1759  Scalar::Type viewType = access->type();
   1760 
   1761  RegI32 rd, rv;
   1762  atomic_xchg32::Temps temps;
   1763  atomic_xchg32::PopAndAllocate(this, type, viewType, &rd, &rv, &temps);
   1764 
   1765  AccessCheck check;
   1766 
   1767  RegAddressType rp = popMemoryAccess<RegAddressType>(access, &check);
   1768  RegPtr instance = maybeLoadInstanceForAccess(access, check);
   1769 
   1770  auto memaddr = prepareAtomicMemoryAccess(access, &check, instance, rp);
   1771  atomic_xchg32::Perform(this, *access, memaddr, rv, rd, temps);
   1772 
   1773 #ifndef RABALDR_PIN_INSTANCE
   1774  maybeFree(instance);
   1775 #endif
   1776  free(rp);
   1777  atomic_xchg32::Deallocate(this, rv, temps);
   1778 
   1779  if (type == ValType::I64) {
   1780    pushU32AsI64(rd);
   1781  } else {
   1782    pushI32(rd);
   1783  }
   1784 }
   1785 
   1786 namespace atomic_xchg64 {
   1787 
   1788 #if defined(JS_CODEGEN_X64)
   1789 
   1790 static void PopAndAllocate(BaseCompiler* bc, RegI64* rd, RegI64* rv) {
   1791  *rv = bc->popI64();
   1792  *rd = *rv;
   1793 }
   1794 
   1795 static void Deallocate(BaseCompiler* bc, RegI64 rd, RegI64) {
   1796  bc->maybeFree(rd);
   1797 }
   1798 
   1799 #elif defined(JS_CODEGEN_X86)
   1800 
   1801 // Register allocation is tricky in several ways.
   1802 //
   1803 // - For a 64-bit access on memory64 we need six registers for rd, rv, and rp,
   1804 //   but have only five (as the temp ebx is needed too), so we target all
   1805 //   registers explicitly to make sure there's space.
   1806 //
   1807 // - We'll be using cmpxchg8b, and when we do the operation, rv must be in
   1808 //   ecx:ebx, and rd must be edx:eax.  We can't use ebx for rv initially because
   1809 //   we need ebx for a scratch also, so use a separate temp and move the value
   1810 //   to ebx just before the operation.
   1811 //
   1812 // In sum:
   1813 //
   1814 // - Initially rv=ecx:edx and eax is reserved, rd=unallocated.
   1815 // - Then rp is popped into esi+edi because those are the only available.
   1816 // - The Setup operation makes rv=ecx:ebx and rd=edx:eax and moves edx->ebx.
   1817 // - Deallocation then frees only the ecx part of rv.
   1818 
   1819 static void PopAndAllocate(BaseCompiler* bc, RegI64* rd, RegI64* rv) {
   1820  bc->needI32(bc->specific_.ecx);
   1821  bc->needI32(bc->specific_.edx);
   1822  bc->needI32(bc->specific_.eax);
   1823  *rv = RegI64(Register64(bc->specific_.ecx, bc->specific_.edx));
   1824  bc->popI64ToSpecific(*rv);
   1825 }
   1826 
   1827 static void Setup(BaseCompiler* bc, RegI64* rv, RegI64* rd,
   1828                  const ScratchAtomicNoHeapReg& scratch) {
   1829  MOZ_ASSERT(rv->high == bc->specific_.ecx);
   1830  MOZ_ASSERT(Register(scratch) == js::jit::ebx);
   1831  bc->masm.move32(rv->low, scratch);
   1832  *rv = bc->specific_.ecx_ebx;
   1833  *rd = bc->specific_.edx_eax;
   1834 }
   1835 
   1836 static void Deallocate(BaseCompiler* bc, RegI64 rd, RegI64 rv) {
   1837  MOZ_ASSERT(rd == bc->specific_.edx_eax || rd == RegI64::Invalid());
   1838  bc->maybeFree(rd);
   1839  bc->freeI32(bc->specific_.ecx);
   1840 }
   1841 
   1842 #elif defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS64) || \
   1843    defined(JS_CODEGEN_LOONG64)
   1844 
   1845 static void PopAndAllocate(BaseCompiler* bc, RegI64* rd, RegI64* rv) {
   1846  *rv = bc->popI64();
   1847  *rd = bc->needI64();
   1848 }
   1849 
   1850 static void Deallocate(BaseCompiler* bc, RegI64 rd, RegI64 rv) {
   1851  bc->freeI64(rv);
   1852  bc->maybeFree(rd);
   1853 }
   1854 
   1855 #elif defined(JS_CODEGEN_ARM)
   1856 
   1857 static void PopAndAllocate(BaseCompiler* bc, RegI64* rd, RegI64* rv) {
   1858  // Both rv and rd must be odd/even pairs.
   1859  *rv = bc->popI64ToSpecific(bc->needI64Pair());
   1860  *rd = bc->needI64Pair();
   1861 }
   1862 
   1863 static void Deallocate(BaseCompiler* bc, RegI64 rd, RegI64 rv) {
   1864  bc->freeI64(rv);
   1865  bc->maybeFree(rd);
   1866 }
   1867 
   1868 #elif defined(JS_CODEGEN_RISCV64)
   1869 
   1870 static void PopAndAllocate(BaseCompiler* bc, RegI64* rd, RegI64* rv) {
   1871  *rv = bc->popI64();
   1872  *rd = bc->needI64();
   1873 }
   1874 
   1875 static void Deallocate(BaseCompiler* bc, RegI64 rd, RegI64 rv) {
   1876  bc->freeI64(rv);
   1877  bc->maybeFree(rd);
   1878 }
   1879 #elif defined(JS_CODEGEN_NONE) || defined(JS_CODEGEN_WASM32)
   1880 
   1881 static void PopAndAllocate(BaseCompiler*, RegI64*, RegI64*) {}
   1882 static void Deallocate(BaseCompiler*, RegI64, RegI64) {}
   1883 
   1884 #endif
   1885 
   1886 }  // namespace atomic_xchg64
   1887 
   1888 template <typename RegAddressType>
   1889 void BaseCompiler::atomicXchg64(MemoryAccessDesc* access,
   1890                                WantResult wantResult) {
   1891  RegI64 rd, rv;
   1892  atomic_xchg64::PopAndAllocate(this, &rd, &rv);
   1893 
   1894  AccessCheck check;
   1895  RegAddressType rp = popMemoryAccess<RegAddressType>(access, &check);
   1896 
   1897 #ifdef WASM_HAS_HEAPREG
   1898  RegPtr instance = maybeLoadInstanceForAccess(access, check);
   1899  auto memaddr =
   1900      prepareAtomicMemoryAccess<RegAddressType>(access, &check, instance, rp);
   1901  masm.wasmAtomicExchange64(*access, memaddr, rv, rd);
   1902 #  ifndef RABALDR_PIN_INSTANCE
   1903  maybeFree(instance);
   1904 #  endif
   1905 #else
   1906  ScratchAtomicNoHeapReg scratch(*this);
   1907  RegPtr instance =
   1908      maybeLoadInstanceForAccess(access, check, RegIntptrToRegPtr(scratch));
   1909  Address memaddr = prepareAtomicMemoryAccess(access, &check, instance, rp);
   1910  atomic_xchg64::Setup(this, &rv, &rd, scratch);
   1911  masm.wasmAtomicExchange64(*access, memaddr, rv, rd);
   1912  MOZ_ASSERT(instance == scratch);
   1913 #endif
   1914 
   1915  free(rp);
   1916  if (wantResult) {
   1917    pushI64(rd);
   1918    rd = RegI64::Invalid();
   1919  }
   1920  atomic_xchg64::Deallocate(this, rd, rv);
   1921 }
   1922 
   1923 //////////////////////////////////////////////////////////////////////////////
   1924 //
   1925 // Atomic compare-exchange.
   1926 
   1927 void BaseCompiler::atomicCmpXchg(MemoryAccessDesc* access, ValType type) {
   1928  Scalar::Type viewType = access->type();
   1929  if (Scalar::byteSize(viewType) <= 4) {
   1930    if (isMem32(access->memoryIndex())) {
   1931      atomicCmpXchg32<RegI32>(access, type);
   1932    } else {
   1933      atomicCmpXchg32<RegI64>(access, type);
   1934    }
   1935  } else {
   1936    MOZ_ASSERT(type == ValType::I64 && Scalar::byteSize(viewType) == 8);
   1937    if (isMem32(access->memoryIndex())) {
   1938      atomicCmpXchg64<RegI32>(access, type);
   1939    } else {
   1940      atomicCmpXchg64<RegI64>(access, type);
   1941    }
   1942  }
   1943 }
   1944 
   1945 namespace atomic_cmpxchg32 {
   1946 
   1947 #if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
   1948 
   1949 using Temps = Nothing;
   1950 
   1951 static void PopAndAllocate(BaseCompiler* bc, ValType type,
   1952                           Scalar::Type viewType, RegI32* rexpect, RegI32* rnew,
   1953                           RegI32* rd, Temps*) {
   1954  // For cmpxchg, the expected value and the result are both in eax.
   1955  bc->needI32(bc->specific_.eax);
   1956  if (type == ValType::I64) {
   1957    *rnew = bc->popI64ToI32();
   1958    *rexpect = bc->popI64ToSpecificI32(bc->specific_.eax);
   1959  } else {
   1960    *rnew = bc->popI32();
   1961    *rexpect = bc->popI32ToSpecific(bc->specific_.eax);
   1962  }
   1963  *rd = *rexpect;
   1964 }
   1965 
   1966 template <typename T>
   1967 static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access, T srcAddr,
   1968                    RegI32 rexpect, RegI32 rnew, RegI32 rd, const Temps&) {
   1969 #  if defined(JS_CODEGEN_X86)
   1970  ScratchI8 scratch(*bc);
   1971  if (access.type() == Scalar::Uint8) {
   1972    MOZ_ASSERT(rd == bc->specific_.eax);
   1973    if (!bc->ra.isSingleByteI32(rnew)) {
   1974      // The replacement value must have a byte persona.
   1975      bc->masm.movl(rnew, scratch);
   1976      rnew = scratch;
   1977    }
   1978  }
   1979 #  endif
   1980  bc->masm.wasmCompareExchange(access, srcAddr, rexpect, rnew, rd);
   1981 }
   1982 
   1983 static void Deallocate(BaseCompiler* bc, RegI32, RegI32 rnew, const Temps&) {
   1984  bc->freeI32(rnew);
   1985 }
   1986 
   1987 #elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64)
   1988 
   1989 using Temps = Nothing;
   1990 
   1991 static void PopAndAllocate(BaseCompiler* bc, ValType type,
   1992                           Scalar::Type viewType, RegI32* rexpect, RegI32* rnew,
   1993                           RegI32* rd, Temps*) {
   1994  if (type == ValType::I64) {
   1995    *rnew = bc->popI64ToI32();
   1996    *rexpect = bc->popI64ToI32();
   1997  } else {
   1998    *rnew = bc->popI32();
   1999    *rexpect = bc->popI32();
   2000  }
   2001  *rd = bc->needI32();
   2002 }
   2003 
   2004 static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
   2005                    Address srcAddr, RegI32 rexpect, RegI32 rnew, RegI32 rd,
   2006                    const Temps&) {
   2007  bc->masm.wasmCompareExchange(access, srcAddr, rexpect, rnew, rd);
   2008 }
   2009 
   2010 static void Deallocate(BaseCompiler* bc, RegI32 rexpect, RegI32 rnew,
   2011                       const Temps&) {
   2012  bc->freeI32(rnew);
   2013  bc->freeI32(rexpect);
   2014 }
   2015 
   2016 #elif defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64)
   2017 
   2018 struct Temps {
   2019  RegI32 t0, t1, t2;
   2020 };
   2021 
   2022 static void PopAndAllocate(BaseCompiler* bc, ValType type,
   2023                           Scalar::Type viewType, RegI32* rexpect, RegI32* rnew,
   2024                           RegI32* rd, Temps* temps) {
   2025  if (type == ValType::I64) {
   2026    *rnew = bc->popI64ToI32();
   2027    *rexpect = bc->popI64ToI32();
   2028    // Architecture-specific i64-to-i32.
   2029    bc->masm.move64To32(Register64(*rexpect), *rexpect);
   2030  } else {
   2031    *rnew = bc->popI32();
   2032    *rexpect = bc->popI32();
   2033  }
   2034  if (Scalar::byteSize(viewType) < 4) {
   2035    temps->t0 = bc->needI32();
   2036    temps->t1 = bc->needI32();
   2037    temps->t2 = bc->needI32();
   2038  }
   2039  *rd = bc->needI32();
   2040 }
   2041 
   2042 static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
   2043                    Address srcAddr, RegI32 rexpect, RegI32 rnew, RegI32 rd,
   2044                    const Temps& temps) {
   2045  bc->masm.wasmCompareExchange(access, srcAddr, rexpect, rnew, temps.t0,
   2046                               temps.t1, temps.t2, rd);
   2047 }
   2048 
   2049 static void Deallocate(BaseCompiler* bc, RegI32 rexpect, RegI32 rnew,
   2050                       const Temps& temps) {
   2051  bc->freeI32(rnew);
   2052  bc->freeI32(rexpect);
   2053  bc->maybeFree(temps.t0);
   2054  bc->maybeFree(temps.t1);
   2055  bc->maybeFree(temps.t2);
   2056 }
   2057 
   2058 #elif defined(JS_CODEGEN_RISCV64)
   2059 
   2060 struct Temps {
   2061  RegI32 t0, t1, t2;
   2062 };
   2063 
   2064 static void PopAndAllocate(BaseCompiler* bc, ValType type,
   2065                           Scalar::Type viewType, RegI32* rexpect, RegI32* rnew,
   2066                           RegI32* rd, Temps* temps) {
   2067  if (type == ValType::I64) {
   2068    *rnew = bc->popI64ToI32();
   2069    *rexpect = bc->popI64ToI32();
   2070  } else {
   2071    *rnew = bc->popI32();
   2072    *rexpect = bc->popI32();
   2073  }
   2074  if (Scalar::byteSize(viewType) < 4) {
   2075    temps->t0 = bc->needI32();
   2076    temps->t1 = bc->needI32();
   2077    temps->t2 = bc->needI32();
   2078  }
   2079  *rd = bc->needI32();
   2080 }
   2081 
   2082 static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
   2083                    Address srcAddr, RegI32 rexpect, RegI32 rnew, RegI32 rd,
   2084                    const Temps& temps) {
   2085  bc->masm.wasmCompareExchange(access, srcAddr, rexpect, rnew, temps.t0,
   2086                               temps.t1, temps.t2, rd);
   2087 }
   2088 
   2089 static void Deallocate(BaseCompiler* bc, RegI32 rexpect, RegI32 rnew,
   2090                       const Temps& temps) {
   2091  bc->freeI32(rnew);
   2092  bc->freeI32(rexpect);
   2093  bc->maybeFree(temps.t0);
   2094  bc->maybeFree(temps.t1);
   2095  bc->maybeFree(temps.t2);
   2096 }
   2097 
   2098 #elif defined(JS_CODEGEN_NONE) || defined(JS_CODEGEN_WASM32)
   2099 
   2100 using Temps = Nothing;
   2101 
   2102 static void PopAndAllocate(BaseCompiler*, ValType, Scalar::Type, RegI32*,
   2103                           RegI32*, RegI32*, Temps*) {}
   2104 
   2105 static void Perform(BaseCompiler*, const MemoryAccessDesc&, Address, RegI32,
   2106                    RegI32, RegI32, const Temps& temps) {}
   2107 
   2108 static void Deallocate(BaseCompiler*, RegI32, RegI32, const Temps&) {}
   2109 
   2110 #endif
   2111 
   2112 }  // namespace atomic_cmpxchg32
   2113 
   2114 template <typename RegAddressType>
   2115 void BaseCompiler::atomicCmpXchg32(MemoryAccessDesc* access, ValType type) {
   2116  Scalar::Type viewType = access->type();
   2117  RegI32 rexpect, rnew, rd;
   2118  atomic_cmpxchg32::Temps temps;
   2119  atomic_cmpxchg32::PopAndAllocate(this, type, viewType, &rexpect, &rnew, &rd,
   2120                                   &temps);
   2121 
   2122  AccessCheck check;
   2123  RegAddressType rp = popMemoryAccess<RegAddressType>(access, &check);
   2124  RegPtr instance = maybeLoadInstanceForAccess(access, check);
   2125 
   2126  auto memaddr = prepareAtomicMemoryAccess(access, &check, instance, rp);
   2127  atomic_cmpxchg32::Perform(this, *access, memaddr, rexpect, rnew, rd, temps);
   2128 
   2129 #ifndef RABALDR_PIN_INSTANCE
   2130  maybeFree(instance);
   2131 #endif
   2132  free(rp);
   2133  atomic_cmpxchg32::Deallocate(this, rexpect, rnew, temps);
   2134 
   2135  if (type == ValType::I64) {
   2136    pushU32AsI64(rd);
   2137  } else {
   2138    pushI32(rd);
   2139  }
   2140 }
   2141 
   2142 namespace atomic_cmpxchg64 {
   2143 
   2144 // The templates are needed for x86 code generation, which needs complicated
   2145 // register allocation for memory64.
   2146 
   2147 template <typename RegAddressType>
   2148 static void PopAndAllocate(BaseCompiler* bc, RegI64* rexpect, RegI64* rnew,
   2149                           RegI64* rd);
   2150 
   2151 template <typename RegAddressType>
   2152 static void Deallocate(BaseCompiler* bc, RegI64 rexpect, RegI64 rnew);
   2153 
   2154 #if defined(JS_CODEGEN_X64)
   2155 
   2156 template <typename RegAddressType>
   2157 static void PopAndAllocate(BaseCompiler* bc, RegI64* rexpect, RegI64* rnew,
   2158                           RegI64* rd) {
   2159  // For cmpxchg, the expected value and the result are both in rax.
   2160  bc->needI64(bc->specific_.rax);
   2161  *rnew = bc->popI64();
   2162  *rexpect = bc->popI64ToSpecific(bc->specific_.rax);
   2163  *rd = *rexpect;
   2164 }
   2165 
   2166 static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
   2167                    Address srcAddr, RegI64 rexpect, RegI64 rnew, RegI64 rd) {
   2168  bc->masm.wasmCompareExchange64(access, srcAddr, rexpect, rnew, rd);
   2169 }
   2170 
   2171 template <typename RegAddressType>
   2172 static void Deallocate(BaseCompiler* bc, RegI64 rexpect, RegI64 rnew) {
   2173  bc->freeI64(rnew);
   2174 }
   2175 
   2176 #elif defined(JS_CODEGEN_X86)
   2177 
   2178 template <typename RegAddressType>
   2179 static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
   2180                    Address srcAddr, RegI64 rexpect, RegI64 rnew, RegI64 rd,
   2181                    ScratchAtomicNoHeapReg& scratch);
   2182 
   2183 // Memory32: For cmpxchg8b, the expected value and the result are both in
   2184 // edx:eax, and the replacement value is in ecx:ebx.  But we can't allocate ebx
   2185 // initially because we need it later for a scratch, so instead we allocate a
   2186 // temp to hold the low word of 'new'.
   2187 
   2188 template <>
   2189 void PopAndAllocate<RegI32>(BaseCompiler* bc, RegI64* rexpect, RegI64* rnew,
   2190                            RegI64* rd) {
   2191  bc->needI64(bc->specific_.edx_eax);
   2192  bc->needI32(bc->specific_.ecx);
   2193  RegI32 tmp = bc->needI32();
   2194  *rnew = bc->popI64ToSpecific(RegI64(Register64(bc->specific_.ecx, tmp)));
   2195  *rexpect = bc->popI64ToSpecific(bc->specific_.edx_eax);
   2196  *rd = *rexpect;
   2197 }
   2198 
   2199 template <>
   2200 void Perform<RegI32>(BaseCompiler* bc, const MemoryAccessDesc& access,
   2201                     Address srcAddr, RegI64 rexpect, RegI64 rnew, RegI64 rd,
   2202                     ScratchAtomicNoHeapReg& scratch) {
   2203  MOZ_ASSERT(Register(scratch) == js::jit::ebx);
   2204  MOZ_ASSERT(rnew.high == bc->specific_.ecx);
   2205  bc->masm.move32(rnew.low, ebx);
   2206  bc->masm.wasmCompareExchange64(access, srcAddr, rexpect,
   2207                                 bc->specific_.ecx_ebx, rd);
   2208 }
   2209 
   2210 template <>
   2211 void Deallocate<RegI32>(BaseCompiler* bc, RegI64 rexpect, RegI64 rnew) {
   2212  bc->freeI64(rnew);
   2213 }
   2214 
   2215 // Memory64: Register allocation is particularly hairy here.  With memory64, we
   2216 // have up to seven live values: i64 expected-value, i64 new-value, i64 pointer,
   2217 // and instance.  The instance can use the scratch but there's no avoiding that
   2218 // we'll run out of registers.
   2219 //
   2220 // Unlike for the rmw ops, we can't use edx as the rnew.low since it's used
   2221 // for the rexpect.high.  And we can't push anything onto the stack while we're
   2222 // popping the memory address because the memory address may be on the stack.
   2223 
   2224 template <>
   2225 void PopAndAllocate<RegI64>(BaseCompiler* bc, RegI64* rexpect, RegI64* rnew,
   2226                            RegI64* rd) {
   2227  // We reserve these (and ebx).  The 64-bit pointer will end up in esi+edi.
   2228  bc->needI32(bc->specific_.eax);
   2229  bc->needI32(bc->specific_.ecx);
   2230  bc->needI32(bc->specific_.edx);
   2231 
   2232  // Pop the 'new' value and stash it in the instance scratch area.  Do not
   2233  // initialize *rnew to anything.
   2234  RegI64 tmp(Register64(bc->specific_.ecx, bc->specific_.edx));
   2235  bc->popI64ToSpecific(tmp);
   2236  {
   2237    ScratchPtr instanceScratch(*bc);
   2238    bc->stashI64(instanceScratch, tmp);
   2239  }
   2240 
   2241  *rexpect = bc->popI64ToSpecific(bc->specific_.edx_eax);
   2242  *rd = *rexpect;
   2243 }
   2244 
   2245 template <>
   2246 void Perform<RegI64>(BaseCompiler* bc, const MemoryAccessDesc& access,
   2247                     Address srcAddr, RegI64 rexpect, RegI64 rnew, RegI64 rd,
   2248                     ScratchAtomicNoHeapReg& scratch) {
   2249  MOZ_ASSERT(rnew.isInvalid());
   2250  rnew = bc->specific_.ecx_ebx;
   2251 
   2252  bc->unstashI64(RegPtr(Register(bc->specific_.ecx)), rnew);
   2253  bc->masm.wasmCompareExchange64(access, srcAddr, rexpect, rnew, rd);
   2254 }
   2255 
   2256 template <>
   2257 void Deallocate<RegI64>(BaseCompiler* bc, RegI64 rexpect, RegI64 rnew) {
   2258  // edx:ebx have been pushed as the result, and the pointer was freed
   2259  // separately in the caller, so just free ecx.
   2260  bc->free(bc->specific_.ecx);
   2261 }
   2262 
   2263 #elif defined(JS_CODEGEN_ARM)
   2264 
   2265 template <typename RegAddressType>
   2266 static void PopAndAllocate(BaseCompiler* bc, RegI64* rexpect, RegI64* rnew,
   2267                           RegI64* rd) {
   2268  // The replacement value and the result must both be odd/even pairs.
   2269  *rnew = bc->popI64Pair();
   2270  *rexpect = bc->popI64();
   2271  *rd = bc->needI64Pair();
   2272 }
   2273 
   2274 static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
   2275                    Address srcAddr, RegI64 rexpect, RegI64 rnew, RegI64 rd) {
   2276  bc->masm.wasmCompareExchange64(access, srcAddr, rexpect, rnew, rd);
   2277 }
   2278 
   2279 template <typename RegAddressType>
   2280 static void Deallocate(BaseCompiler* bc, RegI64 rexpect, RegI64 rnew) {
   2281  bc->freeI64(rexpect);
   2282  bc->freeI64(rnew);
   2283 }
   2284 
   2285 #elif defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS64) || \
   2286    defined(JS_CODEGEN_LOONG64)
   2287 
   2288 template <typename RegAddressType>
   2289 static void PopAndAllocate(BaseCompiler* bc, RegI64* rexpect, RegI64* rnew,
   2290                           RegI64* rd) {
   2291  *rnew = bc->popI64();
   2292  *rexpect = bc->popI64();
   2293  *rd = bc->needI64();
   2294 }
   2295 
   2296 static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
   2297                    Address srcAddr, RegI64 rexpect, RegI64 rnew, RegI64 rd) {
   2298  bc->masm.wasmCompareExchange64(access, srcAddr, rexpect, rnew, rd);
   2299 }
   2300 
   2301 template <typename RegAddressType>
   2302 static void Deallocate(BaseCompiler* bc, RegI64 rexpect, RegI64 rnew) {
   2303  bc->freeI64(rexpect);
   2304  bc->freeI64(rnew);
   2305 }
   2306 
   2307 #elif defined(JS_CODEGEN_RISCV64)
   2308 
   2309 template <typename RegAddressType>
   2310 static void PopAndAllocate(BaseCompiler* bc, RegI64* rexpect, RegI64* rnew,
   2311                           RegI64* rd) {
   2312  *rnew = bc->popI64();
   2313  *rexpect = bc->popI64();
   2314  *rd = bc->needI64();
   2315 }
   2316 
   2317 static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
   2318                    Address srcAddr, RegI64 rexpect, RegI64 rnew, RegI64 rd) {
   2319  bc->masm.wasmCompareExchange64(access, srcAddr, rexpect, rnew, rd);
   2320 }
   2321 
   2322 template <typename RegAddressType>
   2323 static void Deallocate(BaseCompiler* bc, RegI64 rexpect, RegI64 rnew) {
   2324  bc->freeI64(rexpect);
   2325  bc->freeI64(rnew);
   2326 }
   2327 
   2328 #elif defined(JS_CODEGEN_NONE) || defined(JS_CODEGEN_WASM32)
   2329 
   2330 template <typename RegAddressType>
   2331 static void PopAndAllocate(BaseCompiler* bc, RegI64* rexpect, RegI64* rnew,
   2332                           RegI64* rd) {}
   2333 static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
   2334                    Address srcAddr, RegI64 rexpect, RegI64 rnew, RegI64 rd) {}
   2335 template <typename RegAddressType>
   2336 static void Deallocate(BaseCompiler* bc, RegI64 rexpect, RegI64 rnew) {}
   2337 
   2338 #endif
   2339 
   2340 }  // namespace atomic_cmpxchg64
   2341 
   2342 template <typename RegAddressType>
   2343 void BaseCompiler::atomicCmpXchg64(MemoryAccessDesc* access, ValType type) {
   2344  RegI64 rexpect, rnew, rd;
   2345  atomic_cmpxchg64::PopAndAllocate<RegAddressType>(this, &rexpect, &rnew, &rd);
   2346 
   2347  AccessCheck check;
   2348  RegAddressType rp = popMemoryAccess<RegAddressType>(access, &check);
   2349 
   2350 #ifdef WASM_HAS_HEAPREG
   2351  RegPtr instance = maybeLoadInstanceForAccess(access, check);
   2352  auto memaddr = prepareAtomicMemoryAccess(access, &check, instance, rp);
   2353  atomic_cmpxchg64::Perform(this, *access, memaddr, rexpect, rnew, rd);
   2354 #  ifndef RABALDR_PIN_INSTANCE
   2355  maybeFree(instance);
   2356 #  endif
   2357 #else
   2358  ScratchAtomicNoHeapReg scratch(*this);
   2359  RegPtr instance =
   2360      maybeLoadInstanceForAccess(access, check, RegIntptrToRegPtr(scratch));
   2361  Address memaddr = prepareAtomicMemoryAccess(access, &check, instance, rp);
   2362  atomic_cmpxchg64::Perform<RegAddressType>(this, *access, memaddr, rexpect,
   2363                                            rnew, rd, scratch);
   2364  MOZ_ASSERT(instance == scratch);
   2365 #endif
   2366 
   2367  free(rp);
   2368  atomic_cmpxchg64::Deallocate<RegAddressType>(this, rexpect, rnew);
   2369 
   2370  pushI64(rd);
   2371 }
   2372 
   2373 //////////////////////////////////////////////////////////////////////////////
   2374 //
   2375 // Synchronization.
   2376 
   2377 bool BaseCompiler::atomicWait(ValType type, MemoryAccessDesc* access) {
   2378  switch (type.kind()) {
   2379    case ValType::I32: {
   2380      RegI64 timeout = popI64();
   2381      RegI32 val = popI32();
   2382 
   2383      if (isMem32(access->memoryIndex())) {
   2384        computeEffectiveAddress<RegI32>(access);
   2385      } else {
   2386        computeEffectiveAddress<RegI64>(access);
   2387      }
   2388 
   2389      pushI32(val);
   2390      pushI64(timeout);
   2391      pushI32(access->memoryIndex());
   2392 
   2393      if (!emitInstanceCall(isMem32(access->memoryIndex()) ? SASigWaitI32M32
   2394                                                           : SASigWaitI32M64)) {
   2395        return false;
   2396      }
   2397      break;
   2398    }
   2399    case ValType::I64: {
   2400      RegI64 timeout = popI64();
   2401      RegI64 val = popI64();
   2402 
   2403      if (isMem32(access->memoryIndex())) {
   2404        computeEffectiveAddress<RegI32>(access);
   2405      } else {
   2406 #ifdef JS_CODEGEN_X86
   2407        {
   2408          ScratchPtr scratch(*this);
   2409          stashI64(scratch, val);
   2410          freeI64(val);
   2411        }
   2412 #endif
   2413        computeEffectiveAddress<RegI64>(access);
   2414 #ifdef JS_CODEGEN_X86
   2415        {
   2416          ScratchPtr scratch(*this);
   2417          val = needI64();
   2418          unstashI64(scratch, val);
   2419        }
   2420 #endif
   2421      }
   2422 
   2423      pushI64(val);
   2424      pushI64(timeout);
   2425      pushI32(access->memoryIndex());
   2426 
   2427      if (!emitInstanceCall(isMem32(access->memoryIndex()) ? SASigWaitI64M32
   2428                                                           : SASigWaitI64M64)) {
   2429        return false;
   2430      }
   2431      break;
   2432    }
   2433    default:
   2434      MOZ_CRASH();
   2435  }
   2436 
   2437  return true;
   2438 }
   2439 
   2440 bool BaseCompiler::atomicNotify(MemoryAccessDesc* access) {
   2441  RegI32 count = popI32();
   2442 
   2443  if (isMem32(access->memoryIndex())) {
   2444    computeEffectiveAddress<RegI32>(access);
   2445    RegI32 byteOffset = popI32();
   2446    pushI32(byteOffset);
   2447  } else {
   2448    computeEffectiveAddress<RegI64>(access);
   2449    RegI64 byteOffset = popI64();
   2450    pushI64(byteOffset);
   2451  }
   2452 
   2453  pushI32(count);
   2454  pushI32(access->memoryIndex());
   2455  return emitInstanceCall(isMem32(access->memoryIndex()) ? SASigWakeM32
   2456                                                         : SASigWakeM64);
   2457 }
   2458 
   2459 //////////////////////////////////////////////////////////////////////////////
   2460 //
   2461 // Bulk memory.
   2462 
   2463 void BaseCompiler::memCopyInlineM32() {
   2464  MOZ_ASSERT(MaxInlineMemoryCopyLength != 0);
   2465 
   2466  // This function assumes a memory index of zero
   2467  uint32_t memoryIndex = 0;
   2468  int32_t signedLength;
   2469  MOZ_ALWAYS_TRUE(popConst(&signedLength));
   2470  uint32_t length = signedLength;
   2471  MOZ_ASSERT(length != 0 && length <= MaxInlineMemoryCopyLength);
   2472 
   2473  RegI32 src = popI32();
   2474  RegI32 dest = popI32();
   2475 
   2476  // Compute the number of copies of each width we will need to do
   2477  size_t remainder = length;
   2478 #ifdef ENABLE_WASM_SIMD
   2479  size_t numCopies16 = 0;
   2480  if (MacroAssembler::SupportsFastUnalignedFPAccesses()) {
   2481    numCopies16 = remainder / sizeof(V128);
   2482    remainder %= sizeof(V128);
   2483  }
   2484 #endif
   2485 #ifdef JS_64BIT
   2486  size_t numCopies8 = remainder / sizeof(uint64_t);
   2487  remainder %= sizeof(uint64_t);
   2488 #endif
   2489  size_t numCopies4 = remainder / sizeof(uint32_t);
   2490  remainder %= sizeof(uint32_t);
   2491  size_t numCopies2 = remainder / sizeof(uint16_t);
   2492  remainder %= sizeof(uint16_t);
   2493  size_t numCopies1 = remainder;
   2494 
   2495  // Load all source bytes onto the value stack from low to high using the
   2496  // widest transfer width we can for the system. We will trap without writing
   2497  // anything if any source byte is out-of-bounds.
   2498  bool omitBoundsCheck = false;
   2499  size_t offset = 0;
   2500 
   2501 #ifdef ENABLE_WASM_SIMD
   2502  for (uint32_t i = 0; i < numCopies16; i++) {
   2503    RegI32 temp = needI32();
   2504    moveI32(src, temp);
   2505    pushI32(temp);
   2506 
   2507    MemoryAccessDesc access(memoryIndex, Scalar::Simd128, 1, offset,
   2508                            trapSiteDesc(), hugeMemoryEnabled(memoryIndex));
   2509    AccessCheck check;
   2510    check.omitBoundsCheck = omitBoundsCheck;
   2511    loadCommon(&access, check, ValType::V128);
   2512 
   2513    offset += sizeof(V128);
   2514    omitBoundsCheck = true;
   2515  }
   2516 #endif
   2517 
   2518 #ifdef JS_64BIT
   2519  for (uint32_t i = 0; i < numCopies8; i++) {
   2520    RegI32 temp = needI32();
   2521    moveI32(src, temp);
   2522    pushI32(temp);
   2523 
   2524    MemoryAccessDesc access(memoryIndex, Scalar::Int64, 1, offset,
   2525                            trapSiteDesc(), hugeMemoryEnabled(memoryIndex));
   2526    AccessCheck check;
   2527    check.omitBoundsCheck = omitBoundsCheck;
   2528    loadCommon(&access, check, ValType::I64);
   2529 
   2530    offset += sizeof(uint64_t);
   2531    omitBoundsCheck = true;
   2532  }
   2533 #endif
   2534 
   2535  for (uint32_t i = 0; i < numCopies4; i++) {
   2536    RegI32 temp = needI32();
   2537    moveI32(src, temp);
   2538    pushI32(temp);
   2539 
   2540    MemoryAccessDesc access(memoryIndex, Scalar::Uint32, 1, offset,
   2541                            trapSiteDesc(), hugeMemoryEnabled(memoryIndex));
   2542    AccessCheck check;
   2543    check.omitBoundsCheck = omitBoundsCheck;
   2544    loadCommon(&access, check, ValType::I32);
   2545 
   2546    offset += sizeof(uint32_t);
   2547    omitBoundsCheck = true;
   2548  }
   2549 
   2550  if (numCopies2) {
   2551    RegI32 temp = needI32();
   2552    moveI32(src, temp);
   2553    pushI32(temp);
   2554 
   2555    MemoryAccessDesc access(memoryIndex, Scalar::Uint16, 1, offset,
   2556                            trapSiteDesc(), hugeMemoryEnabled(memoryIndex));
   2557    AccessCheck check;
   2558    check.omitBoundsCheck = omitBoundsCheck;
   2559    loadCommon(&access, check, ValType::I32);
   2560 
   2561    offset += sizeof(uint16_t);
   2562    omitBoundsCheck = true;
   2563  }
   2564 
   2565  if (numCopies1) {
   2566    RegI32 temp = needI32();
   2567    moveI32(src, temp);
   2568    pushI32(temp);
   2569 
   2570    MemoryAccessDesc access(memoryIndex, Scalar::Uint8, 1, offset,
   2571                            trapSiteDesc(), hugeMemoryEnabled(memoryIndex));
   2572    AccessCheck check;
   2573    check.omitBoundsCheck = omitBoundsCheck;
   2574    loadCommon(&access, check, ValType::I32);
   2575  }
   2576 
   2577  // Store all source bytes from the value stack to the destination from
   2578  // high to low. We will trap without writing anything on the first store
   2579  // if any dest byte is out-of-bounds.
   2580  offset = length;
   2581  omitBoundsCheck = false;
   2582 
   2583  if (numCopies1) {
   2584    offset -= sizeof(uint8_t);
   2585 
   2586    RegI32 value = popI32();
   2587    RegI32 temp = needI32();
   2588    moveI32(dest, temp);
   2589    pushI32(temp);
   2590    pushI32(value);
   2591 
   2592    MemoryAccessDesc access(memoryIndex, Scalar::Uint8, 1, offset,
   2593                            trapSiteDesc(), hugeMemoryEnabled(memoryIndex));
   2594    AccessCheck check;
   2595    storeCommon(&access, check, ValType::I32);
   2596 
   2597    omitBoundsCheck = true;
   2598  }
   2599 
   2600  if (numCopies2) {
   2601    offset -= sizeof(uint16_t);
   2602 
   2603    RegI32 value = popI32();
   2604    RegI32 temp = needI32();
   2605    moveI32(dest, temp);
   2606    pushI32(temp);
   2607    pushI32(value);
   2608 
   2609    MemoryAccessDesc access(memoryIndex, Scalar::Uint16, 1, offset,
   2610                            trapSiteDesc(), hugeMemoryEnabled(memoryIndex));
   2611    AccessCheck check;
   2612    check.omitBoundsCheck = omitBoundsCheck;
   2613    storeCommon(&access, check, ValType::I32);
   2614 
   2615    omitBoundsCheck = true;
   2616  }
   2617 
   2618  for (uint32_t i = 0; i < numCopies4; i++) {
   2619    offset -= sizeof(uint32_t);
   2620 
   2621    RegI32 value = popI32();
   2622    RegI32 temp = needI32();
   2623    moveI32(dest, temp);
   2624    pushI32(temp);
   2625    pushI32(value);
   2626 
   2627    MemoryAccessDesc access(memoryIndex, Scalar::Uint32, 1, offset,
   2628                            trapSiteDesc(), hugeMemoryEnabled(memoryIndex));
   2629    AccessCheck check;
   2630    check.omitBoundsCheck = omitBoundsCheck;
   2631    storeCommon(&access, check, ValType::I32);
   2632 
   2633    omitBoundsCheck = true;
   2634  }
   2635 
   2636 #ifdef JS_64BIT
   2637  for (uint32_t i = 0; i < numCopies8; i++) {
   2638    offset -= sizeof(uint64_t);
   2639 
   2640    RegI64 value = popI64();
   2641    RegI32 temp = needI32();
   2642    moveI32(dest, temp);
   2643    pushI32(temp);
   2644    pushI64(value);
   2645 
   2646    MemoryAccessDesc access(memoryIndex, Scalar::Int64, 1, offset,
   2647                            trapSiteDesc(), hugeMemoryEnabled(memoryIndex));
   2648    AccessCheck check;
   2649    check.omitBoundsCheck = omitBoundsCheck;
   2650    storeCommon(&access, check, ValType::I64);
   2651 
   2652    omitBoundsCheck = true;
   2653  }
   2654 #endif
   2655 
   2656 #ifdef ENABLE_WASM_SIMD
   2657  for (uint32_t i = 0; i < numCopies16; i++) {
   2658    offset -= sizeof(V128);
   2659 
   2660    RegV128 value = popV128();
   2661    RegI32 temp = needI32();
   2662    moveI32(dest, temp);
   2663    pushI32(temp);
   2664    pushV128(value);
   2665 
   2666    MemoryAccessDesc access(memoryIndex, Scalar::Simd128, 1, offset,
   2667                            trapSiteDesc(), hugeMemoryEnabled(memoryIndex));
   2668    AccessCheck check;
   2669    check.omitBoundsCheck = omitBoundsCheck;
   2670    storeCommon(&access, check, ValType::V128);
   2671 
   2672    omitBoundsCheck = true;
   2673  }
   2674 #endif
   2675 
   2676  freeI32(dest);
   2677  freeI32(src);
   2678 }
   2679 
   2680 void BaseCompiler::memFillInlineM32() {
   2681  MOZ_ASSERT(MaxInlineMemoryFillLength != 0);
   2682 
   2683  // This function assumes a memory index of zero
   2684  uint32_t memoryIndex = 0;
   2685  int32_t signedLength;
   2686  int32_t signedValue;
   2687  MOZ_ALWAYS_TRUE(popConst(&signedLength));
   2688  MOZ_ALWAYS_TRUE(popConst(&signedValue));
   2689  uint32_t length = uint32_t(signedLength);
   2690  uint32_t value = uint32_t(signedValue);
   2691  MOZ_ASSERT(length != 0 && length <= MaxInlineMemoryFillLength);
   2692 
   2693  RegI32 dest = popI32();
   2694 
   2695  // Compute the number of copies of each width we will need to do
   2696  size_t remainder = length;
   2697 #ifdef ENABLE_WASM_SIMD
   2698  size_t numCopies16 = 0;
   2699  if (MacroAssembler::SupportsFastUnalignedFPAccesses()) {
   2700    numCopies16 = remainder / sizeof(V128);
   2701    remainder %= sizeof(V128);
   2702  }
   2703 #endif
   2704 #ifdef JS_64BIT
   2705  size_t numCopies8 = remainder / sizeof(uint64_t);
   2706  remainder %= sizeof(uint64_t);
   2707 #endif
   2708  size_t numCopies4 = remainder / sizeof(uint32_t);
   2709  remainder %= sizeof(uint32_t);
   2710  size_t numCopies2 = remainder / sizeof(uint16_t);
   2711  remainder %= sizeof(uint16_t);
   2712  size_t numCopies1 = remainder;
   2713 
   2714  MOZ_ASSERT(numCopies2 <= 1 && numCopies1 <= 1);
   2715 
   2716  // Generate splatted definitions for wider fills as needed
   2717 #ifdef ENABLE_WASM_SIMD
   2718  V128 val16(value);
   2719 #endif
   2720 #ifdef JS_64BIT
   2721  uint64_t val8 = SplatByteToUInt<uint64_t>(value, 8);
   2722 #endif
   2723  uint32_t val4 = SplatByteToUInt<uint32_t>(value, 4);
   2724  uint32_t val2 = SplatByteToUInt<uint32_t>(value, 2);
   2725  uint32_t val1 = value;
   2726 
   2727  // Store the fill value to the destination from high to low. We will trap
   2728  // without writing anything on the first store if any dest byte is
   2729  // out-of-bounds.
   2730  size_t offset = length;
   2731  bool omitBoundsCheck = false;
   2732 
   2733  if (numCopies1) {
   2734    offset -= sizeof(uint8_t);
   2735 
   2736    RegI32 temp = needI32();
   2737    moveI32(dest, temp);
   2738    pushI32(temp);
   2739    pushI32(val1);
   2740 
   2741    MemoryAccessDesc access(memoryIndex, Scalar::Uint8, 1, offset,
   2742                            trapSiteDesc(), hugeMemoryEnabled(memoryIndex));
   2743    AccessCheck check;
   2744    storeCommon(&access, check, ValType::I32);
   2745 
   2746    omitBoundsCheck = true;
   2747  }
   2748 
   2749  if (numCopies2) {
   2750    offset -= sizeof(uint16_t);
   2751 
   2752    RegI32 temp = needI32();
   2753    moveI32(dest, temp);
   2754    pushI32(temp);
   2755    pushI32(val2);
   2756 
   2757    MemoryAccessDesc access(memoryIndex, Scalar::Uint16, 1, offset,
   2758                            trapSiteDesc(), hugeMemoryEnabled(memoryIndex));
   2759    AccessCheck check;
   2760    check.omitBoundsCheck = omitBoundsCheck;
   2761    storeCommon(&access, check, ValType::I32);
   2762 
   2763    omitBoundsCheck = true;
   2764  }
   2765 
   2766  for (uint32_t i = 0; i < numCopies4; i++) {
   2767    offset -= sizeof(uint32_t);
   2768 
   2769    RegI32 temp = needI32();
   2770    moveI32(dest, temp);
   2771    pushI32(temp);
   2772    pushI32(val4);
   2773 
   2774    MemoryAccessDesc access(memoryIndex, Scalar::Uint32, 1, offset,
   2775                            trapSiteDesc(), hugeMemoryEnabled(memoryIndex));
   2776    AccessCheck check;
   2777    check.omitBoundsCheck = omitBoundsCheck;
   2778    storeCommon(&access, check, ValType::I32);
   2779 
   2780    omitBoundsCheck = true;
   2781  }
   2782 
   2783 #ifdef JS_64BIT
   2784  for (uint32_t i = 0; i < numCopies8; i++) {
   2785    offset -= sizeof(uint64_t);
   2786 
   2787    RegI32 temp = needI32();
   2788    moveI32(dest, temp);
   2789    pushI32(temp);
   2790    pushI64(val8);
   2791 
   2792    MemoryAccessDesc access(memoryIndex, Scalar::Int64, 1, offset,
   2793                            trapSiteDesc(), hugeMemoryEnabled(memoryIndex));
   2794    AccessCheck check;
   2795    check.omitBoundsCheck = omitBoundsCheck;
   2796    storeCommon(&access, check, ValType::I64);
   2797 
   2798    omitBoundsCheck = true;
   2799  }
   2800 #endif
   2801 
   2802 #ifdef ENABLE_WASM_SIMD
   2803  for (uint32_t i = 0; i < numCopies16; i++) {
   2804    offset -= sizeof(V128);
   2805 
   2806    RegI32 temp = needI32();
   2807    moveI32(dest, temp);
   2808    pushI32(temp);
   2809    pushV128(val16);
   2810 
   2811    MemoryAccessDesc access(memoryIndex, Scalar::Simd128, 1, offset,
   2812                            trapSiteDesc(), hugeMemoryEnabled(memoryIndex));
   2813    AccessCheck check;
   2814    check.omitBoundsCheck = omitBoundsCheck;
   2815    storeCommon(&access, check, ValType::V128);
   2816 
   2817    omitBoundsCheck = true;
   2818  }
   2819 #endif
   2820 
   2821  freeI32(dest);
   2822 }
   2823 
   2824 //////////////////////////////////////////////////////////////////////////////
   2825 //
   2826 // SIMD and Relaxed SIMD.
   2827 
   2828 #ifdef ENABLE_WASM_SIMD
   2829 void BaseCompiler::loadSplat(MemoryAccessDesc* access) {
   2830  // We can implement loadSplat mostly as load + splat because the push of the
   2831  // result onto the value stack in loadCommon normally will not generate any
   2832  // code, it will leave the value in a register which we will consume.
   2833 
   2834  // We use uint types when we can on the general assumption that unsigned loads
   2835  // might be smaller/faster on some platforms, because no sign extension needs
   2836  // to be done after the sub-register load.
   2837  RegV128 rd = needV128();
   2838  switch (access->type()) {
   2839    case Scalar::Uint8: {
   2840      loadCommon(access, AccessCheck(), ValType::I32);
   2841      RegI32 rs = popI32();
   2842      masm.splatX16(rs, rd);
   2843      free(rs);
   2844      break;
   2845    }
   2846    case Scalar::Uint16: {
   2847      loadCommon(access, AccessCheck(), ValType::I32);
   2848      RegI32 rs = popI32();
   2849      masm.splatX8(rs, rd);
   2850      free(rs);
   2851      break;
   2852    }
   2853    case Scalar::Uint32: {
   2854      loadCommon(access, AccessCheck(), ValType::I32);
   2855      RegI32 rs = popI32();
   2856      masm.splatX4(rs, rd);
   2857      free(rs);
   2858      break;
   2859    }
   2860    case Scalar::Int64: {
   2861      loadCommon(access, AccessCheck(), ValType::I64);
   2862      RegI64 rs = popI64();
   2863      masm.splatX2(rs, rd);
   2864      free(rs);
   2865      break;
   2866    }
   2867    default:
   2868      MOZ_CRASH();
   2869  }
   2870  pushV128(rd);
   2871 }
   2872 
   2873 void BaseCompiler::loadZero(MemoryAccessDesc* access) {
   2874  access->setZeroExtendSimd128Load();
   2875  loadCommon(access, AccessCheck(), ValType::V128);
   2876 }
   2877 
   2878 void BaseCompiler::loadExtend(MemoryAccessDesc* access, Scalar::Type viewType) {
   2879  loadCommon(access, AccessCheck(), ValType::I64);
   2880 
   2881  RegI64 rs = popI64();
   2882  RegV128 rd = needV128();
   2883  masm.moveGPR64ToDouble(rs, rd);
   2884  switch (viewType) {
   2885    case Scalar::Int8:
   2886      masm.widenLowInt8x16(rd, rd);
   2887      break;
   2888    case Scalar::Uint8:
   2889      masm.unsignedWidenLowInt8x16(rd, rd);
   2890      break;
   2891    case Scalar::Int16:
   2892      masm.widenLowInt16x8(rd, rd);
   2893      break;
   2894    case Scalar::Uint16:
   2895      masm.unsignedWidenLowInt16x8(rd, rd);
   2896      break;
   2897    case Scalar::Int32:
   2898      masm.widenLowInt32x4(rd, rd);
   2899      break;
   2900    case Scalar::Uint32:
   2901      masm.unsignedWidenLowInt32x4(rd, rd);
   2902      break;
   2903    default:
   2904      MOZ_CRASH();
   2905  }
   2906  freeI64(rs);
   2907  pushV128(rd);
   2908 }
   2909 
   2910 void BaseCompiler::loadLane(MemoryAccessDesc* access, uint32_t laneIndex) {
   2911  ValType type = access->type() == Scalar::Int64 ? ValType::I64 : ValType::I32;
   2912 
   2913  RegV128 rsd = popV128();
   2914  loadCommon(access, AccessCheck(), type);
   2915 
   2916  if (type == ValType::I32) {
   2917    RegI32 rs = popI32();
   2918    switch (access->type()) {
   2919      case Scalar::Uint8:
   2920        masm.replaceLaneInt8x16(laneIndex, rs, rsd);
   2921        break;
   2922      case Scalar::Uint16:
   2923        masm.replaceLaneInt16x8(laneIndex, rs, rsd);
   2924        break;
   2925      case Scalar::Int32:
   2926        masm.replaceLaneInt32x4(laneIndex, rs, rsd);
   2927        break;
   2928      default:
   2929        MOZ_CRASH("unsupported access type");
   2930    }
   2931    freeI32(rs);
   2932  } else {
   2933    MOZ_ASSERT(type == ValType::I64);
   2934    RegI64 rs = popI64();
   2935    masm.replaceLaneInt64x2(laneIndex, rs, rsd);
   2936    freeI64(rs);
   2937  }
   2938 
   2939  pushV128(rsd);
   2940 }
   2941 
   2942 void BaseCompiler::storeLane(MemoryAccessDesc* access, uint32_t laneIndex) {
   2943  ValType type = access->type() == Scalar::Int64 ? ValType::I64 : ValType::I32;
   2944 
   2945  RegV128 rs = popV128();
   2946  if (type == ValType::I32) {
   2947    RegI32 tmp = needI32();
   2948    switch (access->type()) {
   2949      case Scalar::Uint8:
   2950        masm.extractLaneInt8x16(laneIndex, rs, tmp);
   2951        break;
   2952      case Scalar::Uint16:
   2953        masm.extractLaneInt16x8(laneIndex, rs, tmp);
   2954        break;
   2955      case Scalar::Int32:
   2956        masm.extractLaneInt32x4(laneIndex, rs, tmp);
   2957        break;
   2958      default:
   2959        MOZ_CRASH("unsupported laneSize");
   2960    }
   2961    pushI32(tmp);
   2962  } else {
   2963    MOZ_ASSERT(type == ValType::I64);
   2964    RegI64 tmp = needI64();
   2965    masm.extractLaneInt64x2(laneIndex, rs, tmp);
   2966    pushI64(tmp);
   2967  }
   2968  freeV128(rs);
   2969 
   2970  storeCommon(access, AccessCheck(), type);
   2971 }
   2972 #endif  // ENABLE_WASM_SIMD
   2973 
   2974 }  // namespace wasm
   2975 }  // namespace js