tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

MacroAssembler-arm64.cpp (136379B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 * This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #include "jit/arm64/MacroAssembler-arm64.h"
      8 
      9 #include "mozilla/MathAlgorithms.h"
     10 #include "mozilla/Maybe.h"
     11 
     12 #include "jsmath.h"
     13 
     14 #include "jit/arm64/MoveEmitter-arm64.h"
     15 #include "jit/arm64/SharedICRegisters-arm64.h"
     16 #include "jit/Bailouts.h"
     17 #include "jit/BaselineFrame.h"
     18 #include "jit/JitRuntime.h"
     19 #include "jit/MacroAssembler.h"
     20 #include "jit/ProcessExecutableMemory.h"
     21 #include "util/Memory.h"
     22 #include "vm/BigIntType.h"
     23 #include "vm/JitActivation.h"  // js::jit::JitActivation
     24 #include "vm/JSContext.h"
     25 #include "vm/StringType.h"
     26 #include "wasm/WasmStubs.h"
     27 
     28 #include "jit/MacroAssembler-inl.h"
     29 
     30 namespace js {
     31 namespace jit {
     32 
     33 enum class Width { _32 = 32, _64 = 64 };
     34 
     35 static inline ARMRegister X(Register r) { return ARMRegister(r, 64); }
     36 
     37 static inline ARMRegister X(MacroAssembler& masm, RegisterOrSP r) {
     38  return masm.toARMRegister(r, 64);
     39 }
     40 
     41 static inline ARMRegister W(Register r) { return ARMRegister(r, 32); }
     42 
     43 static inline ARMRegister R(Register r, Width w) {
     44  return ARMRegister(r, unsigned(w));
     45 }
     46 
     47 #ifdef DEBUG
     48 static constexpr int32_t PayloadSize(JSValueType type) {
     49  switch (type) {
     50    case JSVAL_TYPE_UNDEFINED:
     51    case JSVAL_TYPE_NULL:
     52      return 0;
     53    case JSVAL_TYPE_BOOLEAN:
     54      return 1;
     55    case JSVAL_TYPE_INT32:
     56    case JSVAL_TYPE_MAGIC:
     57      return 32;
     58    case JSVAL_TYPE_STRING:
     59    case JSVAL_TYPE_SYMBOL:
     60    case JSVAL_TYPE_PRIVATE_GCTHING:
     61    case JSVAL_TYPE_BIGINT:
     62    case JSVAL_TYPE_OBJECT:
     63      return JSVAL_TAG_SHIFT;
     64    case JSVAL_TYPE_DOUBLE:
     65    case JSVAL_TYPE_UNKNOWN:
     66      break;
     67  }
     68  MOZ_CRASH("bad value type");
     69 }
     70 #endif
     71 
     72 static void AssertValidPayload(MacroAssemblerCompat& masm, JSValueType type,
     73                               Register payload, Register scratch) {
     74 #ifdef DEBUG
     75  // All bits above the payload must be zeroed.
     76  Label upperBitsZeroed;
     77  masm.Lsr(ARMRegister(scratch, 64), ARMRegister(payload, 64),
     78           PayloadSize(type));
     79  masm.Cbz(ARMRegister(scratch, 64), &upperBitsZeroed);
     80  masm.breakpoint();
     81  masm.bind(&upperBitsZeroed);
     82 #endif
     83 }
     84 
     85 void MacroAssemblerCompat::tagValue(JSValueType type, Register payload,
     86                                    ValueOperand dest) {
     87  MOZ_ASSERT(type != JSVAL_TYPE_UNDEFINED && type != JSVAL_TYPE_NULL);
     88 
     89 #ifdef DEBUG
     90  {
     91    vixl::UseScratchRegisterScope temps(this);
     92    Register scratch = temps.AcquireX().asUnsized();
     93 
     94    AssertValidPayload(*this, type, payload, scratch);
     95  }
     96 #endif
     97 
     98  Orr(ARMRegister(dest.valueReg(), 64), ARMRegister(payload, 64),
     99      Operand(ImmShiftedTag(type).value));
    100 }
    101 
    102 void MacroAssemblerCompat::boxValue(JSValueType type, Register src,
    103                                    Register dest) {
    104  MOZ_ASSERT(type != JSVAL_TYPE_UNDEFINED && type != JSVAL_TYPE_NULL);
    105  MOZ_ASSERT(src != dest);
    106 
    107  AssertValidPayload(*this, type, src, dest);
    108 
    109  Orr(ARMRegister(dest, 64), ARMRegister(src, 64),
    110      Operand(ImmShiftedTag(type).value));
    111 }
    112 
    113 void MacroAssemblerCompat::boxValue(Register type, Register src,
    114                                    Register dest) {
    115  MOZ_ASSERT(src != dest);
    116 
    117 #ifdef DEBUG
    118  {
    119    vixl::UseScratchRegisterScope temps(this);
    120    Register scratch = temps.AcquireX().asUnsized();
    121 
    122    Label check, isNullOrUndefined, isBoolean, isInt32OrMagic, isPointerSized;
    123 
    124    asMasm().branch32(Assembler::Equal, type, Imm32(JSVAL_TYPE_NULL),
    125                      &isNullOrUndefined);
    126    asMasm().branch32(Assembler::Equal, type, Imm32(JSVAL_TYPE_UNDEFINED),
    127                      &isNullOrUndefined);
    128    asMasm().branch32(Assembler::Equal, type, Imm32(JSVAL_TYPE_BOOLEAN),
    129                      &isBoolean);
    130    asMasm().branch32(Assembler::Equal, type, Imm32(JSVAL_TYPE_INT32),
    131                      &isInt32OrMagic);
    132    asMasm().branch32(Assembler::Equal, type, Imm32(JSVAL_TYPE_MAGIC),
    133                      &isInt32OrMagic);
    134    asMasm().branch32(Assembler::Equal, type, Imm32(JSVAL_TYPE_STRING),
    135                      &isPointerSized);
    136    asMasm().branch32(Assembler::Equal, type, Imm32(JSVAL_TYPE_SYMBOL),
    137                      &isPointerSized);
    138    asMasm().branch32(Assembler::Equal, type, Imm32(JSVAL_TYPE_PRIVATE_GCTHING),
    139                      &isPointerSized);
    140    asMasm().branch32(Assembler::Equal, type, Imm32(JSVAL_TYPE_BIGINT),
    141                      &isPointerSized);
    142    asMasm().branch32(Assembler::Equal, type, Imm32(JSVAL_TYPE_OBJECT),
    143                      &isPointerSized);
    144    breakpoint();
    145    {
    146      bind(&isNullOrUndefined);
    147      move32(Imm32(PayloadSize(JSVAL_TYPE_NULL)), scratch);
    148      jump(&check);
    149    }
    150    {
    151      bind(&isBoolean);
    152      move32(Imm32(PayloadSize(JSVAL_TYPE_BOOLEAN)), scratch);
    153      jump(&check);
    154    }
    155    {
    156      bind(&isInt32OrMagic);
    157      move32(Imm32(PayloadSize(JSVAL_TYPE_INT32)), scratch);
    158      jump(&check);
    159    }
    160    {
    161      bind(&isPointerSized);
    162      move32(Imm32(PayloadSize(JSVAL_TYPE_STRING)), scratch);
    163      // fall-through
    164    }
    165    bind(&check);
    166 
    167    // All bits above the payload must be zeroed.
    168    Label upperBitsZeroed;
    169    Lsr(ARMRegister(scratch, 64), ARMRegister(src, 64),
    170        ARMRegister(scratch, 64));
    171    Cbz(ARMRegister(scratch, 64), &upperBitsZeroed);
    172    breakpoint();
    173    bind(&upperBitsZeroed);
    174  }
    175 #endif
    176 
    177  Orr(ARMRegister(dest, 64), ARMRegister(type, 64),
    178      Operand(JSVAL_TAG_MAX_DOUBLE));
    179  Orr(ARMRegister(dest, 64), ARMRegister(src, 64),
    180      Operand(ARMRegister(dest, 64), vixl::LSL, JSVAL_TAG_SHIFT));
    181 }
    182 
    183 #ifdef ENABLE_WASM_SIMD
    184 bool MacroAssembler::MustMaskShiftCountSimd128(wasm::SimdOp op, int32_t* mask) {
    185  switch (op) {
    186    case wasm::SimdOp::I8x16Shl:
    187    case wasm::SimdOp::I8x16ShrU:
    188    case wasm::SimdOp::I8x16ShrS:
    189      *mask = 7;
    190      break;
    191    case wasm::SimdOp::I16x8Shl:
    192    case wasm::SimdOp::I16x8ShrU:
    193    case wasm::SimdOp::I16x8ShrS:
    194      *mask = 15;
    195      break;
    196    case wasm::SimdOp::I32x4Shl:
    197    case wasm::SimdOp::I32x4ShrU:
    198    case wasm::SimdOp::I32x4ShrS:
    199      *mask = 31;
    200      break;
    201    case wasm::SimdOp::I64x2Shl:
    202    case wasm::SimdOp::I64x2ShrU:
    203    case wasm::SimdOp::I64x2ShrS:
    204      *mask = 63;
    205      break;
    206    default:
    207      MOZ_CRASH("Unexpected shift operation");
    208  }
    209  return true;
    210 }
    211 #endif
    212 
    213 void MacroAssembler::clampDoubleToUint8(FloatRegister input, Register output) {
    214  ARMRegister dest(output, 32);
    215  Fcvtns(dest, ARMFPRegister(input, 64));
    216 
    217  {
    218    vixl::UseScratchRegisterScope temps(this);
    219    const ARMRegister scratch32 = temps.AcquireW();
    220 
    221    Mov(scratch32, Operand(0xff));
    222    Cmp(dest, scratch32);
    223    Csel(dest, dest, scratch32, LessThan);
    224  }
    225 
    226  Cmp(dest, Operand(0));
    227  Csel(dest, dest, wzr, GreaterThan);
    228 }
    229 
    230 js::jit::MacroAssembler& MacroAssemblerCompat::asMasm() {
    231  return *static_cast<js::jit::MacroAssembler*>(this);
    232 }
    233 
    234 const js::jit::MacroAssembler& MacroAssemblerCompat::asMasm() const {
    235  return *static_cast<const js::jit::MacroAssembler*>(this);
    236 }
    237 
    238 vixl::MacroAssembler& MacroAssemblerCompat::asVIXL() {
    239  return *static_cast<vixl::MacroAssembler*>(this);
    240 }
    241 
    242 const vixl::MacroAssembler& MacroAssemblerCompat::asVIXL() const {
    243  return *static_cast<const vixl::MacroAssembler*>(this);
    244 }
    245 
    246 void MacroAssemblerCompat::mov(CodeLabel* label, Register dest) {
    247  BufferOffset bo = movePatchablePtr(ImmWord(/* placeholder */ 0), dest);
    248  label->patchAt()->bind(bo.getOffset());
    249  label->setLinkMode(CodeLabel::MoveImmediate);
    250 }
    251 
    252 BufferOffset MacroAssemblerCompat::movePatchablePtr(ImmPtr ptr, Register dest) {
    253  const size_t numInst = 1;           // Inserting one load instruction.
    254  const unsigned numPoolEntries = 2;  // Every pool entry is 4 bytes.
    255  uint8_t* literalAddr = (uint8_t*)(&ptr.value);  // TODO: Should be const.
    256 
    257  // Scratch space for generating the load instruction.
    258  //
    259  // allocLiteralLoadEntry() will use InsertIndexIntoTag() to store a temporary
    260  // index to the corresponding PoolEntry in the instruction itself.
    261  //
    262  // That index will be fixed up later when finishPool()
    263  // walks over all marked loads and calls PatchConstantPoolLoad().
    264  uint32_t instructionScratch = 0;
    265 
    266  // Emit the instruction mask in the scratch space.
    267  // The offset doesn't matter: it will be fixed up later.
    268  vixl::Assembler::ldr((Instruction*)&instructionScratch, ARMRegister(dest, 64),
    269                       0);
    270 
    271  // Add the entry to the pool, fix up the LDR imm19 offset,
    272  // and add the completed instruction to the buffer.
    273  return allocLiteralLoadEntry(numInst, numPoolEntries,
    274                               (uint8_t*)&instructionScratch, literalAddr);
    275 }
    276 
    277 BufferOffset MacroAssemblerCompat::movePatchablePtr(ImmWord ptr,
    278                                                    Register dest) {
    279  const size_t numInst = 1;           // Inserting one load instruction.
    280  const unsigned numPoolEntries = 2;  // Every pool entry is 4 bytes.
    281  uint8_t* literalAddr = (uint8_t*)(&ptr.value);
    282 
    283  // Scratch space for generating the load instruction.
    284  //
    285  // allocLiteralLoadEntry() will use InsertIndexIntoTag() to store a temporary
    286  // index to the corresponding PoolEntry in the instruction itself.
    287  //
    288  // That index will be fixed up later when finishPool()
    289  // walks over all marked loads and calls PatchConstantPoolLoad().
    290  uint32_t instructionScratch = 0;
    291 
    292  // Emit the instruction mask in the scratch space.
    293  // The offset doesn't matter: it will be fixed up later.
    294  vixl::Assembler::ldr((Instruction*)&instructionScratch, ARMRegister(dest, 64),
    295                       0);
    296 
    297  // Add the entry to the pool, fix up the LDR imm19 offset,
    298  // and add the completed instruction to the buffer.
    299  return allocLiteralLoadEntry(numInst, numPoolEntries,
    300                               (uint8_t*)&instructionScratch, literalAddr);
    301 }
    302 
    303 void MacroAssemblerCompat::loadPrivate(const Address& src, Register dest) {
    304  loadPtr(src, dest);
    305 }
    306 
    307 void MacroAssemblerCompat::handleFailureWithHandlerTail(
    308    Label* profilerExitTail, Label* bailoutTail,
    309    uint32_t* returnValueCheckOffset) {
    310  // Fail rather than silently create wrong code.
    311  MOZ_RELEASE_ASSERT(GetStackPointer64().Is(PseudoStackPointer64));
    312 
    313  // Reserve space for exception information.
    314  int64_t size = (sizeof(ResumeFromException) + 7) & ~7;
    315  Sub(PseudoStackPointer64, PseudoStackPointer64, Operand(size));
    316  syncStackPtr();
    317 
    318  MOZ_ASSERT(!x0.Is(PseudoStackPointer64));
    319  Mov(x0, PseudoStackPointer64);
    320 
    321  // Call the handler.
    322  using Fn = void (*)(ResumeFromException* rfe);
    323  asMasm().setupUnalignedABICall(r1);
    324  asMasm().passABIArg(r0);
    325  asMasm().callWithABI<Fn, HandleException>(
    326      ABIType::General, CheckUnsafeCallWithABI::DontCheckHasExitFrame);
    327 
    328  *returnValueCheckOffset = asMasm().currentOffset();
    329 
    330  Label entryFrame;
    331  Label catch_;
    332  Label finally;
    333  Label returnBaseline;
    334  Label returnIon;
    335  Label bailout;
    336  Label wasmInterpEntry;
    337  Label wasmCatch;
    338 
    339  // Check the `asMasm` calls above didn't mess with the StackPointer identity.
    340  MOZ_ASSERT(GetStackPointer64().Is(PseudoStackPointer64));
    341 
    342  loadPtr(Address(PseudoStackPointer, ResumeFromException::offsetOfKind()), r0);
    343  asMasm().branch32(Assembler::Equal, r0,
    344                    Imm32(ExceptionResumeKind::EntryFrame), &entryFrame);
    345  asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Catch),
    346                    &catch_);
    347  asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Finally),
    348                    &finally);
    349  asMasm().branch32(Assembler::Equal, r0,
    350                    Imm32(ExceptionResumeKind::ForcedReturnBaseline),
    351                    &returnBaseline);
    352  asMasm().branch32(Assembler::Equal, r0,
    353                    Imm32(ExceptionResumeKind::ForcedReturnIon), &returnIon);
    354  asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Bailout),
    355                    &bailout);
    356  asMasm().branch32(Assembler::Equal, r0,
    357                    Imm32(ExceptionResumeKind::WasmInterpEntry),
    358                    &wasmInterpEntry);
    359  asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::WasmCatch),
    360                    &wasmCatch);
    361 
    362  breakpoint();  // Invalid kind.
    363 
    364  // No exception handler. Load the error value, restore state and return from
    365  // the entry frame.
    366  bind(&entryFrame);
    367  moveValue(MagicValue(JS_ION_ERROR), JSReturnOperand);
    368  loadPtr(
    369      Address(PseudoStackPointer, ResumeFromException::offsetOfFramePointer()),
    370      FramePointer);
    371  loadPtr(
    372      Address(PseudoStackPointer, ResumeFromException::offsetOfStackPointer()),
    373      PseudoStackPointer);
    374 
    375  // `retn` does indeed sync the stack pointer, but before doing that it reads
    376  // from the stack.  Consequently, if we remove this call to syncStackPointer
    377  // then we take on the requirement to prove that the immediately preceding
    378  // loadPtr produces a value for PSP which maintains the SP <= PSP invariant.
    379  // That's a proof burden we don't want to take on.  In general it would be
    380  // good to move (at some time in the future, not now) to a world where
    381  // *every* assignment to PSP or SP is followed immediately by a copy into
    382  // the other register.  That would make all required correctness proofs
    383  // trivial in the sense that it requires only local inspection of code
    384  // immediately following (dominated by) any such assignment.
    385  syncStackPtr();
    386  retn(Imm32(1 * sizeof(void*)));  // Pop from stack and return.
    387 
    388  // If we found a catch handler, this must be a baseline frame. Restore state
    389  // and jump to the catch block.
    390  bind(&catch_);
    391  loadPtr(Address(PseudoStackPointer, ResumeFromException::offsetOfTarget()),
    392          r0);
    393  loadPtr(
    394      Address(PseudoStackPointer, ResumeFromException::offsetOfFramePointer()),
    395      FramePointer);
    396  loadPtr(
    397      Address(PseudoStackPointer, ResumeFromException::offsetOfStackPointer()),
    398      PseudoStackPointer);
    399  syncStackPtr();
    400  Br(x0);
    401 
    402  // If we found a finally block, this must be a baseline frame. Push three
    403  // values expected by the finally block: the exception, the exception stack,
    404  // and BooleanValue(true).
    405  bind(&finally);
    406  ARMRegister exception = x1;
    407  Ldr(exception, MemOperand(PseudoStackPointer64,
    408                            ResumeFromException::offsetOfException()));
    409 
    410  ARMRegister exceptionStack = x2;
    411  Ldr(exceptionStack,
    412      MemOperand(PseudoStackPointer64,
    413                 ResumeFromException::offsetOfExceptionStack()));
    414 
    415  Ldr(x0,
    416      MemOperand(PseudoStackPointer64, ResumeFromException::offsetOfTarget()));
    417  Ldr(ARMRegister(FramePointer, 64),
    418      MemOperand(PseudoStackPointer64,
    419                 ResumeFromException::offsetOfFramePointer()));
    420  Ldr(PseudoStackPointer64,
    421      MemOperand(PseudoStackPointer64,
    422                 ResumeFromException::offsetOfStackPointer()));
    423  syncStackPtr();
    424  push(exception);
    425  push(exceptionStack);
    426  pushValue(BooleanValue(true));
    427  Br(x0);
    428 
    429  // Return BaselineFrame->returnValue() to the caller.
    430  // Used in debug mode and for GeneratorReturn.
    431  Label profilingInstrumentation;
    432  bind(&returnBaseline);
    433  loadPtr(
    434      Address(PseudoStackPointer, ResumeFromException::offsetOfFramePointer()),
    435      FramePointer);
    436  loadPtr(
    437      Address(PseudoStackPointer, ResumeFromException::offsetOfStackPointer()),
    438      PseudoStackPointer);
    439  // See comment further up beginning "`retn` does indeed sync the stack
    440  // pointer".  That comment applies here too.
    441  syncStackPtr();
    442  loadValue(Address(FramePointer, BaselineFrame::reverseOffsetOfReturnValue()),
    443            JSReturnOperand);
    444  jump(&profilingInstrumentation);
    445 
    446  // Return the given value to the caller.
    447  bind(&returnIon);
    448  loadValue(
    449      Address(PseudoStackPointer, ResumeFromException::offsetOfException()),
    450      JSReturnOperand);
    451  loadPtr(
    452      Address(PseudoStackPointer, offsetof(ResumeFromException, framePointer)),
    453      FramePointer);
    454  loadPtr(
    455      Address(PseudoStackPointer, offsetof(ResumeFromException, stackPointer)),
    456      PseudoStackPointer);
    457  syncStackPtr();
    458 
    459  // If profiling is enabled, then update the lastProfilingFrame to refer to
    460  // caller frame before returning. This code is shared by ForcedReturnIon
    461  // and ForcedReturnBaseline.
    462  bind(&profilingInstrumentation);
    463  {
    464    Label skipProfilingInstrumentation;
    465    AbsoluteAddress addressOfEnabled(
    466        asMasm().runtime()->geckoProfiler().addressOfEnabled());
    467    asMasm().branch32(Assembler::Equal, addressOfEnabled, Imm32(0),
    468                      &skipProfilingInstrumentation);
    469    jump(profilerExitTail);
    470    bind(&skipProfilingInstrumentation);
    471  }
    472 
    473  movePtr(FramePointer, PseudoStackPointer);
    474  syncStackPtr();
    475  vixl::MacroAssembler::Pop(ARMRegister(FramePointer, 64));
    476 
    477  vixl::MacroAssembler::Pop(vixl::lr);
    478  syncStackPtr();
    479  vixl::MacroAssembler::Ret(vixl::lr);
    480 
    481  // If we are bailing out to baseline to handle an exception, jump to the
    482  // bailout tail stub. Load 1 (true) in x0 (ReturnReg) to indicate success.
    483  bind(&bailout);
    484  Ldr(x2, MemOperand(PseudoStackPointer64,
    485                     ResumeFromException::offsetOfBailoutInfo()));
    486  Ldr(PseudoStackPointer64,
    487      MemOperand(PseudoStackPointer64,
    488                 ResumeFromException::offsetOfStackPointer()));
    489  syncStackPtr();
    490  Mov(x0, 1);
    491  jump(bailoutTail);
    492 
    493  // Reset SP and FP; SP is pointing to the unwound return address to the wasm
    494  // interpreter entry, so we can just ret().
    495  bind(&wasmInterpEntry);
    496  Ldr(x29, MemOperand(PseudoStackPointer64,
    497                      ResumeFromException::offsetOfFramePointer()));
    498  Ldr(PseudoStackPointer64,
    499      MemOperand(PseudoStackPointer64,
    500                 ResumeFromException::offsetOfStackPointer()));
    501  syncStackPtr();
    502  Mov(x23, int64_t(wasm::InterpFailInstanceReg));
    503  ret();
    504 
    505  // Found a wasm catch handler, restore state and jump to it.
    506  bind(&wasmCatch);
    507  wasm::GenerateJumpToCatchHandler(asMasm(), PseudoStackPointer, r0, r1);
    508 
    509  MOZ_ASSERT(GetStackPointer64().Is(PseudoStackPointer64));
    510 }
    511 
    512 void MacroAssemblerCompat::profilerEnterFrame(Register framePtr,
    513                                              Register scratch) {
    514  asMasm().loadJSContext(scratch);
    515  loadPtr(Address(scratch, offsetof(JSContext, profilingActivation_)), scratch);
    516  storePtr(framePtr,
    517           Address(scratch, JitActivation::offsetOfLastProfilingFrame()));
    518  storePtr(ImmPtr(nullptr),
    519           Address(scratch, JitActivation::offsetOfLastProfilingCallSite()));
    520 }
    521 
    522 void MacroAssemblerCompat::profilerExitFrame() {
    523  jump(asMasm().runtime()->jitRuntime()->getProfilerExitFrameTail());
    524 }
    525 
    526 Assembler::Condition MacroAssemblerCompat::testStringTruthy(
    527    bool truthy, const ValueOperand& value) {
    528  vixl::UseScratchRegisterScope temps(this);
    529  const Register scratch = temps.AcquireX().asUnsized();
    530  const ARMRegister scratch32(scratch, 32);
    531  const ARMRegister scratch64(scratch, 64);
    532 
    533  MOZ_ASSERT(value.valueReg() != scratch);
    534 
    535  unboxString(value, scratch);
    536  Ldr(scratch32, MemOperand(scratch64, JSString::offsetOfLength()));
    537  Cmp(scratch32, Operand(0));
    538  return truthy ? Condition::NonZero : Condition::Zero;
    539 }
    540 
    541 Assembler::Condition MacroAssemblerCompat::testBigIntTruthy(
    542    bool truthy, const ValueOperand& value) {
    543  vixl::UseScratchRegisterScope temps(this);
    544  const Register scratch = temps.AcquireX().asUnsized();
    545 
    546  MOZ_ASSERT(value.valueReg() != scratch);
    547 
    548  unboxBigInt(value, scratch);
    549  load32(Address(scratch, BigInt::offsetOfDigitLength()), scratch);
    550  cmp32(scratch, Imm32(0));
    551  return truthy ? Condition::NonZero : Condition::Zero;
    552 }
    553 
    554 void MacroAssemblerCompat::breakpoint() {
    555  // Note, other payloads are possible, but GDB is known to misinterpret them
    556  // sometimes and iloop on the breakpoint instead of stopping properly.
    557  Brk(0xf000);
    558 }
    559 
    560 void MacroAssemblerCompat::minMax32(Register lhs, Register rhs, Register dest,
    561                                    bool isMax) {
    562  auto lhs32 = ARMRegister(lhs, 32);
    563  auto rhs32 = vixl::Operand(ARMRegister(rhs, 32));
    564  auto dest32 = ARMRegister(dest, 32);
    565 
    566  if (CPUHas(vixl::CPUFeatures::kCSSC)) {
    567    if (isMax) {
    568      Smax(dest32, lhs32, rhs32);
    569    } else {
    570      Smin(dest32, lhs32, rhs32);
    571    }
    572    return;
    573  }
    574 
    575  auto cond = isMax ? Assembler::GreaterThan : Assembler::LessThan;
    576  Cmp(lhs32, rhs32);
    577  Csel(dest32, lhs32, rhs32, cond);
    578 }
    579 
    580 void MacroAssemblerCompat::minMax32(Register lhs, Imm32 rhs, Register dest,
    581                                    bool isMax) {
    582  auto lhs32 = ARMRegister(lhs, 32);
    583  auto rhs32 = vixl::Operand(vixl::IntegerOperand(rhs.value));
    584  auto dest32 = ARMRegister(dest, 32);
    585 
    586  if (CPUHas(vixl::CPUFeatures::kCSSC)) {
    587    if (isMax) {
    588      Smax(dest32, lhs32, rhs32);
    589    } else {
    590      Smin(dest32, lhs32, rhs32);
    591    }
    592    return;
    593  }
    594 
    595  // max(lhs, 0): dest = lhs & ~(lhs >> 31)
    596  // min(lhs, 0): dest = lhs & (lhs >> 31)
    597  if (rhs32.GetImmediate() == 0) {
    598    if (isMax) {
    599      Bic(dest32, lhs32, vixl::Operand(lhs32, vixl::ASR, 31));
    600    } else {
    601      And(dest32, lhs32, vixl::Operand(lhs32, vixl::ASR, 31));
    602    }
    603    return;
    604  }
    605 
    606  // max(lhs, 1): lhs > 0 ? lhs : 1
    607  // min(lhs, 1): lhs <= 0 ? lhs : 1
    608  //
    609  // Note: Csel emits a single `csinc` instruction when the operand is 1.
    610  if (rhs32.GetImmediate() == 1) {
    611    auto cond = isMax ? Assembler::GreaterThan : Assembler::LessThanOrEqual;
    612    Cmp(lhs32, vixl::Operand(0));
    613    Csel(dest32, lhs32, rhs32, cond);
    614    return;
    615  }
    616 
    617  // max(lhs, -1): lhs >= 0 ? lhs : -1
    618  // min(lhs, -1): lhs < 0 ? lhs : -1
    619  //
    620  // Note: Csel emits a single `csinv` instruction when the operand is -1.
    621  if (rhs32.GetImmediate() == -1) {
    622    auto cond = isMax ? Assembler::GreaterThanOrEqual : Assembler::LessThan;
    623    Cmp(lhs32, vixl::Operand(0));
    624    Csel(dest32, lhs32, rhs32, cond);
    625    return;
    626  }
    627 
    628  auto cond =
    629      isMax ? Assembler::GreaterThanOrEqual : Assembler::LessThanOrEqual;
    630 
    631  // Use scratch register when immediate can't be encoded in `cmp` instruction.
    632  // This avoids materializing the immediate twice.
    633  if (!IsImmAddSub(mozilla::Abs(rhs32.GetImmediate()))) {
    634    vixl::UseScratchRegisterScope temps(this);
    635    vixl::Register scratch32 = temps.AcquireW();
    636 
    637    Mov(scratch32, rhs32.GetImmediate());
    638    Cmp(lhs32, scratch32);
    639    Csel(dest32, lhs32, vixl::Operand(scratch32), cond);
    640    return;
    641  }
    642 
    643  if (lhs != dest) {
    644    Mov(dest32, lhs32);
    645  }
    646  Label done;
    647  Cmp(lhs32, rhs32);
    648  B(&done, cond);
    649  Mov(dest32, rhs32);
    650  bind(&done);
    651 }
    652 
    653 void MacroAssemblerCompat::minMaxPtr(Register lhs, Register rhs, Register dest,
    654                                     bool isMax) {
    655  auto lhs64 = ARMRegister(lhs, 64);
    656  auto rhs64 = vixl::Operand(ARMRegister(rhs, 64));
    657  auto dest64 = ARMRegister(dest, 64);
    658 
    659  if (CPUHas(vixl::CPUFeatures::kCSSC)) {
    660    if (isMax) {
    661      Smax(dest64, lhs64, rhs64);
    662    } else {
    663      Smin(dest64, lhs64, rhs64);
    664    }
    665    return;
    666  }
    667 
    668  auto cond = isMax ? Assembler::GreaterThan : Assembler::LessThan;
    669  Cmp(lhs64, rhs64);
    670  Csel(dest64, lhs64, rhs64, cond);
    671 }
    672 
    673 void MacroAssemblerCompat::minMaxPtr(Register lhs, ImmWord rhs, Register dest,
    674                                     bool isMax) {
    675  auto lhs64 = ARMRegister(lhs, 64);
    676  auto rhs64 = vixl::Operand(vixl::IntegerOperand(rhs.value));
    677  auto dest64 = ARMRegister(dest, 64);
    678 
    679  if (CPUHas(vixl::CPUFeatures::kCSSC)) {
    680    if (isMax) {
    681      Smax(dest64, lhs64, rhs64);
    682    } else {
    683      Smin(dest64, lhs64, rhs64);
    684    }
    685    return;
    686  }
    687 
    688  // max(lhs, 0): dest = lhs & ~(lhs >> 63)
    689  // min(lhs, 0): dest = lhs & (lhs >> 63)
    690  if (rhs64.GetImmediate() == 0) {
    691    if (isMax) {
    692      Bic(dest64, lhs64, vixl::Operand(lhs64, vixl::ASR, 63));
    693    } else {
    694      And(dest64, lhs64, vixl::Operand(lhs64, vixl::ASR, 63));
    695    }
    696    return;
    697  }
    698 
    699  // max(lhs, 1): lhs > 0 ? lhs : 1
    700  // min(lhs, 1): lhs <= 0 ? lhs : 1
    701  //
    702  // Note: Csel emits a single `csinc` instruction when the operand is 1.
    703  if (rhs64.GetImmediate() == 1) {
    704    auto cond = isMax ? Assembler::GreaterThan : Assembler::LessThanOrEqual;
    705    Cmp(lhs64, vixl::Operand(0));
    706    Csel(dest64, lhs64, rhs64, cond);
    707    return;
    708  }
    709 
    710  // max(lhs, -1): lhs >= 0 ? lhs : -1
    711  // min(lhs, -1): lhs < 0 ? lhs : -1
    712  //
    713  // Note: Csel emits a single `csinv` instruction when the operand is -1.
    714  if (rhs64.GetImmediate() == -1) {
    715    auto cond = isMax ? Assembler::GreaterThanOrEqual : Assembler::LessThan;
    716    Cmp(lhs64, vixl::Operand(0));
    717    Csel(dest64, lhs64, rhs64, cond);
    718    return;
    719  }
    720 
    721  auto cond =
    722      isMax ? Assembler::GreaterThanOrEqual : Assembler::LessThanOrEqual;
    723 
    724  // Use scratch register when immediate can't be encoded in `cmp` instruction.
    725  // This avoids materializing the immediate twice.
    726  if (!IsImmAddSub(mozilla::Abs(rhs64.GetImmediate()))) {
    727    vixl::UseScratchRegisterScope temps(this);
    728    vixl::Register scratch64 = temps.AcquireX();
    729 
    730    Mov(scratch64, rhs64.GetImmediate());
    731    Cmp(lhs64, scratch64);
    732    Csel(dest64, lhs64, vixl::Operand(scratch64), cond);
    733    return;
    734  }
    735 
    736  if (lhs != dest) {
    737    Mov(dest64, lhs64);
    738  }
    739  Label done;
    740  Cmp(lhs64, rhs64);
    741  B(&done, cond);
    742  Mov(dest64, rhs64);
    743  bind(&done);
    744 }
    745 
    746 // Either `any` is valid or `sixtyfour` is valid.  Return a 32-bit ARMRegister
    747 // in the first case and an ARMRegister of the desired size in the latter case.
    748 
    749 static inline ARMRegister SelectGPReg(AnyRegister any, Register64 sixtyfour,
    750                                      unsigned size = 64) {
    751  MOZ_ASSERT(any.isValid() != (sixtyfour != Register64::Invalid()));
    752 
    753  if (sixtyfour == Register64::Invalid()) {
    754    return ARMRegister(any.gpr(), 32);
    755  }
    756 
    757  return ARMRegister(sixtyfour.reg, size);
    758 }
    759 
    760 // Assert that `sixtyfour` is invalid and then return an FP register from `any`
    761 // of the desired size.
    762 
    763 static inline ARMFPRegister SelectFPReg(AnyRegister any, Register64 sixtyfour,
    764                                        unsigned size) {
    765  MOZ_ASSERT(sixtyfour == Register64::Invalid());
    766  return ARMFPRegister(any.fpu(), size);
    767 }
    768 
    769 void MacroAssemblerCompat::wasmLoadImpl(const wasm::MemoryAccessDesc& access,
    770                                        Register memoryBase_, Register ptr_,
    771                                        AnyRegister outany, Register64 out64) {
    772  access.assertOffsetInGuardPages();
    773  uint32_t offset = access.offset32();
    774 
    775  MOZ_ASSERT(memoryBase_ != ptr_);
    776 
    777  ARMRegister memoryBase(memoryBase_, 64);
    778  ARMRegister ptr(ptr_, 64);
    779  if (offset) {
    780    vixl::UseScratchRegisterScope temps(this);
    781    ARMRegister scratch = temps.AcquireX();
    782    Add(scratch, ptr, Operand(offset));
    783    MemOperand srcAddr(memoryBase, scratch);
    784    wasmLoadImpl(access, srcAddr, outany, out64);
    785  } else {
    786    MemOperand srcAddr(memoryBase, ptr);
    787    wasmLoadImpl(access, srcAddr, outany, out64);
    788  }
    789 }
    790 
    791 void MacroAssemblerCompat::wasmLoadImpl(const wasm::MemoryAccessDesc& access,
    792                                        MemOperand srcAddr, AnyRegister outany,
    793                                        Register64 out64) {
    794  MOZ_ASSERT_IF(access.isSplatSimd128Load() || access.isWidenSimd128Load(),
    795                access.type() == Scalar::Float64);
    796 
    797  // NOTE: the generated code must match the assembly code in gen_load in
    798  // GenerateAtomicOperations.py
    799  asMasm().memoryBarrierBefore(access.sync());
    800 
    801  FaultingCodeOffset fco;
    802  switch (access.type()) {
    803    case Scalar::Int8:
    804      fco = Ldrsb(SelectGPReg(outany, out64), srcAddr);
    805      break;
    806    case Scalar::Uint8:
    807      fco = Ldrb(SelectGPReg(outany, out64), srcAddr);
    808      break;
    809    case Scalar::Int16:
    810      fco = Ldrsh(SelectGPReg(outany, out64), srcAddr);
    811      break;
    812    case Scalar::Uint16:
    813      fco = Ldrh(SelectGPReg(outany, out64), srcAddr);
    814      break;
    815    case Scalar::Int32:
    816      if (out64 != Register64::Invalid()) {
    817        fco = Ldrsw(SelectGPReg(outany, out64), srcAddr);
    818      } else {
    819        fco = Ldr(SelectGPReg(outany, out64, 32), srcAddr);
    820      }
    821      break;
    822    case Scalar::Uint32:
    823      fco = Ldr(SelectGPReg(outany, out64, 32), srcAddr);
    824      break;
    825    case Scalar::Int64:
    826      fco = Ldr(SelectGPReg(outany, out64), srcAddr);
    827      break;
    828    case Scalar::Float32:
    829      // LDR does the right thing also for access.isZeroExtendSimd128Load()
    830      fco = Ldr(SelectFPReg(outany, out64, 32), srcAddr);
    831      break;
    832    case Scalar::Float64:
    833      if (access.isSplatSimd128Load() || access.isWidenSimd128Load()) {
    834        ScratchSimd128Scope scratch_(asMasm());
    835        ARMFPRegister scratch = Simd1D(scratch_);
    836        fco = Ldr(scratch, srcAddr);
    837        if (access.isSplatSimd128Load()) {
    838          Dup(SelectFPReg(outany, out64, 128).V2D(), scratch, 0);
    839        } else {
    840          MOZ_ASSERT(access.isWidenSimd128Load());
    841          switch (access.widenSimdOp()) {
    842            case wasm::SimdOp::V128Load8x8S:
    843              Sshll(SelectFPReg(outany, out64, 128).V8H(), scratch.V8B(), 0);
    844              break;
    845            case wasm::SimdOp::V128Load8x8U:
    846              Ushll(SelectFPReg(outany, out64, 128).V8H(), scratch.V8B(), 0);
    847              break;
    848            case wasm::SimdOp::V128Load16x4S:
    849              Sshll(SelectFPReg(outany, out64, 128).V4S(), scratch.V4H(), 0);
    850              break;
    851            case wasm::SimdOp::V128Load16x4U:
    852              Ushll(SelectFPReg(outany, out64, 128).V4S(), scratch.V4H(), 0);
    853              break;
    854            case wasm::SimdOp::V128Load32x2S:
    855              Sshll(SelectFPReg(outany, out64, 128).V2D(), scratch.V2S(), 0);
    856              break;
    857            case wasm::SimdOp::V128Load32x2U:
    858              Ushll(SelectFPReg(outany, out64, 128).V2D(), scratch.V2S(), 0);
    859              break;
    860            default:
    861              MOZ_CRASH("Unexpected widening op for wasmLoad");
    862          }
    863        }
    864      } else {
    865        // LDR does the right thing also for access.isZeroExtendSimd128Load()
    866        fco = Ldr(SelectFPReg(outany, out64, 64), srcAddr);
    867      }
    868      break;
    869    case Scalar::Simd128:
    870      fco = Ldr(SelectFPReg(outany, out64, 128), srcAddr);
    871      break;
    872    case Scalar::Uint8Clamped:
    873    case Scalar::BigInt64:
    874    case Scalar::BigUint64:
    875    case Scalar::Float16:
    876    case Scalar::MaxTypedArrayViewType:
    877      MOZ_CRASH("unexpected array type");
    878  }
    879 
    880  append(access, wasm::TrapMachineInsnForLoad(byteSize(access.type())), fco);
    881 
    882  asMasm().memoryBarrierAfter(access.sync());
    883 }
    884 
    885 // Return true if `address` can be represented as an immediate (possibly scaled
    886 // by the access size) in an LDR/STR type instruction.
    887 //
    888 // For more about the logic here, see vixl::MacroAssembler::LoadStoreMacro().
    889 static bool IsLSImmediateOffset(uint64_t address, size_t accessByteSize) {
    890  // The predicates below operate on signed values only.
    891  if (address > INT64_MAX) {
    892    return false;
    893  }
    894 
    895  // The access size is always a power of 2, so computing the log amounts to
    896  // counting trailing zeroes.
    897  unsigned logAccessSize = mozilla::CountTrailingZeroes32(accessByteSize);
    898  return (MacroAssemblerCompat::IsImmLSUnscaled(int64_t(address)) ||
    899          MacroAssemblerCompat::IsImmLSScaled(int64_t(address), logAccessSize));
    900 }
    901 
    902 void MacroAssemblerCompat::wasmLoadAbsolute(
    903    const wasm::MemoryAccessDesc& access, Register memoryBase, uint64_t address,
    904    AnyRegister output, Register64 out64) {
    905  if (!IsLSImmediateOffset(address, access.byteSize())) {
    906    // The access will require the constant to be loaded into a temp register.
    907    // Do so here, to keep the logic in wasmLoadImpl() tractable wrt emitting
    908    // trap information.
    909    //
    910    // Almost all constant addresses will in practice be handled by a single MOV
    911    // so do not worry about additional optimizations here.
    912    vixl::UseScratchRegisterScope temps(this);
    913    ARMRegister scratch = temps.AcquireX();
    914    Mov(scratch, address);
    915    MemOperand srcAddr(X(memoryBase), scratch);
    916    wasmLoadImpl(access, srcAddr, output, out64);
    917  } else {
    918    MemOperand srcAddr(X(memoryBase), address);
    919    wasmLoadImpl(access, srcAddr, output, out64);
    920  }
    921 }
    922 
    923 void MacroAssemblerCompat::wasmStoreImpl(const wasm::MemoryAccessDesc& access,
    924                                         AnyRegister valany, Register64 val64,
    925                                         Register memoryBase_, Register ptr_) {
    926  access.assertOffsetInGuardPages();
    927  uint32_t offset = access.offset32();
    928 
    929  ARMRegister memoryBase(memoryBase_, 64);
    930  ARMRegister ptr(ptr_, 64);
    931  if (offset) {
    932    vixl::UseScratchRegisterScope temps(this);
    933    ARMRegister scratch = temps.AcquireX();
    934    Add(scratch, ptr, Operand(offset));
    935    MemOperand destAddr(memoryBase, scratch);
    936    wasmStoreImpl(access, destAddr, valany, val64);
    937  } else {
    938    MemOperand destAddr(memoryBase, ptr);
    939    wasmStoreImpl(access, destAddr, valany, val64);
    940  }
    941 }
    942 
    943 void MacroAssemblerCompat::wasmStoreImpl(const wasm::MemoryAccessDesc& access,
    944                                         MemOperand dstAddr, AnyRegister valany,
    945                                         Register64 val64) {
    946  // NOTE: the generated code must match the assembly code in gen_store in
    947  // GenerateAtomicOperations.py
    948  asMasm().memoryBarrierBefore(access.sync());
    949 
    950  FaultingCodeOffset fco;
    951  switch (access.type()) {
    952    case Scalar::Int8:
    953    case Scalar::Uint8:
    954      fco = Strb(SelectGPReg(valany, val64), dstAddr);
    955      break;
    956    case Scalar::Int16:
    957    case Scalar::Uint16:
    958      fco = Strh(SelectGPReg(valany, val64), dstAddr);
    959      break;
    960    case Scalar::Int32:
    961    case Scalar::Uint32:
    962      fco = Str(SelectGPReg(valany, val64), dstAddr);
    963      break;
    964    case Scalar::Int64:
    965      fco = Str(SelectGPReg(valany, val64), dstAddr);
    966      break;
    967    case Scalar::Float32:
    968      fco = Str(SelectFPReg(valany, val64, 32), dstAddr);
    969      break;
    970    case Scalar::Float64:
    971      fco = Str(SelectFPReg(valany, val64, 64), dstAddr);
    972      break;
    973    case Scalar::Simd128:
    974      fco = Str(SelectFPReg(valany, val64, 128), dstAddr);
    975      break;
    976    case Scalar::Uint8Clamped:
    977    case Scalar::BigInt64:
    978    case Scalar::BigUint64:
    979    case Scalar::Float16:
    980    case Scalar::MaxTypedArrayViewType:
    981      MOZ_CRASH("unexpected array type");
    982  }
    983 
    984  append(access, wasm::TrapMachineInsnForStore(byteSize(access.type())), fco);
    985 
    986  asMasm().memoryBarrierAfter(access.sync());
    987 }
    988 
    989 void MacroAssemblerCompat::wasmStoreAbsolute(
    990    const wasm::MemoryAccessDesc& access, AnyRegister value, Register64 value64,
    991    Register memoryBase, uint64_t address) {
    992  // See comments in wasmLoadAbsolute.
    993  unsigned logAccessSize = mozilla::CountTrailingZeroes32(access.byteSize());
    994  if (address > INT64_MAX || !(IsImmLSScaled(int64_t(address), logAccessSize) ||
    995                               IsImmLSUnscaled(int64_t(address)))) {
    996    vixl::UseScratchRegisterScope temps(this);
    997    ARMRegister scratch = temps.AcquireX();
    998    Mov(scratch, address);
    999    MemOperand destAddr(X(memoryBase), scratch);
   1000    wasmStoreImpl(access, destAddr, value, value64);
   1001  } else {
   1002    MemOperand destAddr(X(memoryBase), address);
   1003    wasmStoreImpl(access, destAddr, value, value64);
   1004  }
   1005 }
   1006 
   1007 void MacroAssemblerCompat::compareSimd128Int(Assembler::Condition cond,
   1008                                             ARMFPRegister dest,
   1009                                             ARMFPRegister lhs,
   1010                                             ARMFPRegister rhs) {
   1011  switch (cond) {
   1012    case Assembler::Equal:
   1013      Cmeq(dest, lhs, rhs);
   1014      break;
   1015    case Assembler::NotEqual:
   1016      Cmeq(dest, lhs, rhs);
   1017      Mvn(dest, dest);
   1018      break;
   1019    case Assembler::GreaterThan:
   1020      Cmgt(dest, lhs, rhs);
   1021      break;
   1022    case Assembler::GreaterThanOrEqual:
   1023      Cmge(dest, lhs, rhs);
   1024      break;
   1025    case Assembler::LessThan:
   1026      Cmgt(dest, rhs, lhs);
   1027      break;
   1028    case Assembler::LessThanOrEqual:
   1029      Cmge(dest, rhs, lhs);
   1030      break;
   1031    case Assembler::Above:
   1032      Cmhi(dest, lhs, rhs);
   1033      break;
   1034    case Assembler::AboveOrEqual:
   1035      Cmhs(dest, lhs, rhs);
   1036      break;
   1037    case Assembler::Below:
   1038      Cmhi(dest, rhs, lhs);
   1039      break;
   1040    case Assembler::BelowOrEqual:
   1041      Cmhs(dest, rhs, lhs);
   1042      break;
   1043    default:
   1044      MOZ_CRASH("Unexpected SIMD integer condition");
   1045  }
   1046 }
   1047 
   1048 void MacroAssemblerCompat::compareSimd128Float(Assembler::Condition cond,
   1049                                               ARMFPRegister dest,
   1050                                               ARMFPRegister lhs,
   1051                                               ARMFPRegister rhs) {
   1052  switch (cond) {
   1053    case Assembler::Equal:
   1054      Fcmeq(dest, lhs, rhs);
   1055      break;
   1056    case Assembler::NotEqual:
   1057      Fcmeq(dest, lhs, rhs);
   1058      Mvn(dest, dest);
   1059      break;
   1060    case Assembler::GreaterThan:
   1061      Fcmgt(dest, lhs, rhs);
   1062      break;
   1063    case Assembler::GreaterThanOrEqual:
   1064      Fcmge(dest, lhs, rhs);
   1065      break;
   1066    case Assembler::LessThan:
   1067      Fcmgt(dest, rhs, lhs);
   1068      break;
   1069    case Assembler::LessThanOrEqual:
   1070      Fcmge(dest, rhs, lhs);
   1071      break;
   1072    default:
   1073      MOZ_CRASH("Unexpected SIMD integer condition");
   1074  }
   1075 }
   1076 
   1077 void MacroAssemblerCompat::rightShiftInt8x16(FloatRegister lhs, Register rhs,
   1078                                             FloatRegister dest,
   1079                                             bool isUnsigned) {
   1080  ScratchSimd128Scope scratch_(asMasm());
   1081  ARMFPRegister shift = Simd16B(scratch_);
   1082 
   1083  Dup(shift, ARMRegister(rhs, 32));
   1084  Neg(shift, shift);
   1085 
   1086  if (isUnsigned) {
   1087    Ushl(Simd16B(dest), Simd16B(lhs), shift);
   1088  } else {
   1089    Sshl(Simd16B(dest), Simd16B(lhs), shift);
   1090  }
   1091 }
   1092 
   1093 void MacroAssemblerCompat::rightShiftInt16x8(FloatRegister lhs, Register rhs,
   1094                                             FloatRegister dest,
   1095                                             bool isUnsigned) {
   1096  ScratchSimd128Scope scratch_(asMasm());
   1097  ARMFPRegister shift = Simd8H(scratch_);
   1098 
   1099  Dup(shift, ARMRegister(rhs, 32));
   1100  Neg(shift, shift);
   1101 
   1102  if (isUnsigned) {
   1103    Ushl(Simd8H(dest), Simd8H(lhs), shift);
   1104  } else {
   1105    Sshl(Simd8H(dest), Simd8H(lhs), shift);
   1106  }
   1107 }
   1108 
   1109 void MacroAssemblerCompat::rightShiftInt32x4(FloatRegister lhs, Register rhs,
   1110                                             FloatRegister dest,
   1111                                             bool isUnsigned) {
   1112  ScratchSimd128Scope scratch_(asMasm());
   1113  ARMFPRegister shift = Simd4S(scratch_);
   1114 
   1115  Dup(shift, ARMRegister(rhs, 32));
   1116  Neg(shift, shift);
   1117 
   1118  if (isUnsigned) {
   1119    Ushl(Simd4S(dest), Simd4S(lhs), shift);
   1120  } else {
   1121    Sshl(Simd4S(dest), Simd4S(lhs), shift);
   1122  }
   1123 }
   1124 
   1125 void MacroAssemblerCompat::rightShiftInt64x2(FloatRegister lhs, Register rhs,
   1126                                             FloatRegister dest,
   1127                                             bool isUnsigned) {
   1128  ScratchSimd128Scope scratch_(asMasm());
   1129  ARMFPRegister shift = Simd2D(scratch_);
   1130 
   1131  Dup(shift, ARMRegister(rhs, 64));
   1132  Neg(shift, shift);
   1133 
   1134  if (isUnsigned) {
   1135    Ushl(Simd2D(dest), Simd2D(lhs), shift);
   1136  } else {
   1137    Sshl(Simd2D(dest), Simd2D(lhs), shift);
   1138  }
   1139 }
   1140 
   1141 void MacroAssembler::reserveStack(uint32_t amount) {
   1142  // TODO: This bumps |sp| every time we reserve using a second register.
   1143  // It would save some instructions if we had a fixed frame size.
   1144  vixl::MacroAssembler::Claim(Operand(amount));
   1145  adjustFrame(amount);
   1146 }
   1147 
   1148 void MacroAssembler::Push(RegisterOrSP reg) {
   1149  if (IsHiddenSP(reg)) {
   1150    push(sp);
   1151  } else {
   1152    push(AsRegister(reg));
   1153  }
   1154  adjustFrame(sizeof(intptr_t));
   1155 }
   1156 
   1157 //{{{ check_macroassembler_style
   1158 // ===============================================================
   1159 // MacroAssembler high-level usage.
   1160 
   1161 void MacroAssembler::flush() { Assembler::flush(); }
   1162 
   1163 // ===============================================================
   1164 // Stack manipulation functions.
   1165 
   1166 // Routines for saving/restoring registers on the stack.  The format is:
   1167 //
   1168 //   (highest address)
   1169 //
   1170 //   integer (X) regs in any order      size: 8 * # int regs
   1171 //
   1172 //   if # int regs is odd,
   1173 //     then an 8 byte alignment hole    size: 0 or 8
   1174 //
   1175 //   double (D) regs in any order       size: 8 * # double regs
   1176 //
   1177 //   if # double regs is odd,
   1178 //     then an 8 byte alignment hole    size: 0 or 8
   1179 //
   1180 //   vector (Q) regs in any order       size: 16 * # vector regs
   1181 //
   1182 //   (lowest address)
   1183 //
   1184 // Hence the size of the save area is 0 % 16.  And, provided that the base
   1185 // (highest) address is 16-aligned, then the vector reg save/restore accesses
   1186 // will also be 16-aligned, as will pairwise operations for the double regs.
   1187 //
   1188 // Implied by this is that the format of the double and vector dump area
   1189 // corresponds with what FloatRegister::GetPushSizeInBytes computes.
   1190 // See block comment in MacroAssembler.h for more details.
   1191 
   1192 size_t MacroAssembler::PushRegsInMaskSizeInBytes(LiveRegisterSet set) {
   1193  size_t numIntRegs = set.gprs().size();
   1194  return ((numIntRegs + 1) & ~1) * sizeof(intptr_t) +
   1195         FloatRegister::GetPushSizeInBytes(set.fpus());
   1196 }
   1197 
   1198 // Generate code to dump the values in `set`, either on the stack if `dest` is
   1199 // `Nothing` or working backwards from the address denoted by `dest` if it is
   1200 // `Some`.  These two cases are combined so as to minimise the chance of
   1201 // mistakenly generating different formats for the same `set`, given that the
   1202 // `Some` `dest` case is used extremely rarely.
   1203 static void PushOrStoreRegsInMask(MacroAssembler* masm, LiveRegisterSet set,
   1204                                  mozilla::Maybe<Address> dest) {
   1205  static_assert(sizeof(FloatRegisters::RegisterContent) == 16);
   1206 
   1207  // If we're saving to arbitrary memory, check the destination is big enough.
   1208  if (dest) {
   1209    mozilla::DebugOnly<size_t> bytesRequired =
   1210        MacroAssembler::PushRegsInMaskSizeInBytes(set);
   1211    MOZ_ASSERT(dest->offset >= 0);
   1212    MOZ_ASSERT(((size_t)dest->offset) >= bytesRequired);
   1213  }
   1214 
   1215  // Note the high limit point; we'll check it again later.
   1216  mozilla::DebugOnly<size_t> maxExtentInitial =
   1217      dest ? dest->offset : masm->framePushed();
   1218 
   1219  // Gather up the integer registers in groups of four, and either push each
   1220  // group as a single transfer so as to minimise the number of stack pointer
   1221  // changes, or write them individually to memory.  Take care to ensure the
   1222  // space used remains 16-aligned.
   1223  for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more();) {
   1224    vixl::CPURegister src[4] = {vixl::NoCPUReg, vixl::NoCPUReg, vixl::NoCPUReg,
   1225                                vixl::NoCPUReg};
   1226    size_t i;
   1227    for (i = 0; i < 4 && iter.more(); i++) {
   1228      src[i] = ARMRegister(*iter, 64);
   1229      ++iter;
   1230    }
   1231    MOZ_ASSERT(i > 0);
   1232 
   1233    if (i == 1 || i == 3) {
   1234      // Ensure the stack remains 16-aligned
   1235      MOZ_ASSERT(!iter.more());
   1236      src[i] = vixl::xzr;
   1237      i++;
   1238    }
   1239    MOZ_ASSERT(i == 2 || i == 4);
   1240 
   1241    if (dest) {
   1242      for (size_t j = 0; j < i; j++) {
   1243        Register ireg = Register::FromCode(src[j].IsZero() ? Registers::xzr
   1244                                                           : src[j].code());
   1245        dest->offset -= sizeof(intptr_t);
   1246        masm->storePtr(ireg, *dest);
   1247      }
   1248    } else {
   1249      masm->adjustFrame(i * 8);
   1250      masm->vixl::MacroAssembler::Push(src[0], src[1], src[2], src[3]);
   1251    }
   1252  }
   1253 
   1254  // Now the same for the FP double registers.  Note that because of how
   1255  // ReduceSetForPush works, an underlying AArch64 SIMD/FP register can either
   1256  // be present as a double register, or as a V128 register, but not both.
   1257  // Firstly, round up the registers to be pushed.
   1258 
   1259  FloatRegisterSet fpuSet(set.fpus().reduceSetForPush());
   1260  vixl::CPURegister allSrcs[FloatRegisters::TotalPhys];
   1261  size_t numAllSrcs = 0;
   1262 
   1263  for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) {
   1264    FloatRegister reg = *iter;
   1265    if (reg.isDouble()) {
   1266      MOZ_RELEASE_ASSERT(numAllSrcs < FloatRegisters::TotalPhys);
   1267      allSrcs[numAllSrcs] = ARMFPRegister(reg, 64);
   1268      numAllSrcs++;
   1269    } else {
   1270      MOZ_ASSERT(reg.isSimd128());
   1271    }
   1272  }
   1273  MOZ_RELEASE_ASSERT(numAllSrcs <= FloatRegisters::TotalPhys);
   1274 
   1275  if ((numAllSrcs & 1) == 1) {
   1276    // We've got an odd number of doubles.  In order to maintain 16-alignment,
   1277    // push the last register twice.  We'll skip over the duplicate in
   1278    // PopRegsInMaskIgnore.
   1279    allSrcs[numAllSrcs] = allSrcs[numAllSrcs - 1];
   1280    numAllSrcs++;
   1281  }
   1282  MOZ_RELEASE_ASSERT(numAllSrcs <= FloatRegisters::TotalPhys);
   1283  MOZ_RELEASE_ASSERT((numAllSrcs & 1) == 0);
   1284 
   1285  // And now generate the transfers.
   1286  size_t i;
   1287  if (dest) {
   1288    for (i = 0; i < numAllSrcs; i++) {
   1289      FloatRegister freg =
   1290          FloatRegister(FloatRegisters::FPRegisterID(allSrcs[i].code()),
   1291                        FloatRegisters::Kind::Double);
   1292      dest->offset -= sizeof(double);
   1293      masm->storeDouble(freg, *dest);
   1294    }
   1295  } else {
   1296    i = 0;
   1297    while (i < numAllSrcs) {
   1298      vixl::CPURegister src[4] = {vixl::NoCPUReg, vixl::NoCPUReg,
   1299                                  vixl::NoCPUReg, vixl::NoCPUReg};
   1300      size_t j;
   1301      for (j = 0; j < 4 && j + i < numAllSrcs; j++) {
   1302        src[j] = allSrcs[j + i];
   1303      }
   1304      masm->adjustFrame(8 * j);
   1305      masm->vixl::MacroAssembler::Push(src[0], src[1], src[2], src[3]);
   1306      i += j;
   1307    }
   1308  }
   1309  MOZ_ASSERT(i == numAllSrcs);
   1310 
   1311  // Finally, deal with the SIMD (V128) registers.  This is a bit simpler
   1312  // as there's no need for special-casing to maintain 16-alignment.
   1313 
   1314  numAllSrcs = 0;
   1315  for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) {
   1316    FloatRegister reg = *iter;
   1317    if (reg.isSimd128()) {
   1318      MOZ_RELEASE_ASSERT(numAllSrcs < FloatRegisters::TotalPhys);
   1319      allSrcs[numAllSrcs] = ARMFPRegister(reg, 128);
   1320      numAllSrcs++;
   1321    }
   1322  }
   1323  MOZ_RELEASE_ASSERT(numAllSrcs <= FloatRegisters::TotalPhys);
   1324 
   1325  // Generate the transfers.
   1326  if (dest) {
   1327    for (i = 0; i < numAllSrcs; i++) {
   1328      FloatRegister freg =
   1329          FloatRegister(FloatRegisters::FPRegisterID(allSrcs[i].code()),
   1330                        FloatRegisters::Kind::Simd128);
   1331      dest->offset -= FloatRegister::SizeOfSimd128;
   1332      masm->storeUnalignedSimd128(freg, *dest);
   1333    }
   1334  } else {
   1335    i = 0;
   1336    while (i < numAllSrcs) {
   1337      vixl::CPURegister src[4] = {vixl::NoCPUReg, vixl::NoCPUReg,
   1338                                  vixl::NoCPUReg, vixl::NoCPUReg};
   1339      size_t j;
   1340      for (j = 0; j < 4 && j + i < numAllSrcs; j++) {
   1341        src[j] = allSrcs[j + i];
   1342      }
   1343      masm->adjustFrame(16 * j);
   1344      masm->vixl::MacroAssembler::Push(src[0], src[1], src[2], src[3]);
   1345      i += j;
   1346    }
   1347  }
   1348  MOZ_ASSERT(i == numAllSrcs);
   1349 
   1350  // Final overrun check.
   1351  if (dest) {
   1352    MOZ_ASSERT(maxExtentInitial - dest->offset ==
   1353               MacroAssembler::PushRegsInMaskSizeInBytes(set));
   1354  } else {
   1355    MOZ_ASSERT(masm->framePushed() - maxExtentInitial ==
   1356               MacroAssembler::PushRegsInMaskSizeInBytes(set));
   1357  }
   1358 }
   1359 
   1360 void MacroAssembler::PushRegsInMask(LiveRegisterSet set) {
   1361  PushOrStoreRegsInMask(this, set, mozilla::Nothing());
   1362 }
   1363 
   1364 void MacroAssembler::storeRegsInMask(LiveRegisterSet set, Address dest,
   1365                                     Register scratch) {
   1366  PushOrStoreRegsInMask(this, set, mozilla::Some(dest));
   1367 }
   1368 
   1369 // This is a helper function for PopRegsInMaskIgnore below.  It emits the
   1370 // loads described by dests[0] and [1] and offsets[0] and [1], generating a
   1371 // load-pair if it can.
   1372 static void GeneratePendingLoadsThenFlush(MacroAssembler* masm,
   1373                                          vixl::CPURegister* dests,
   1374                                          uint32_t* offsets,
   1375                                          uint32_t transactionSize) {
   1376  // Generate the loads ..
   1377  if (!dests[0].IsNone()) {
   1378    if (!dests[1].IsNone()) {
   1379      // [0] and [1] both present.
   1380      if (offsets[0] + transactionSize == offsets[1]) {
   1381        masm->Ldp(dests[0], dests[1],
   1382                  MemOperand(masm->GetStackPointer64(), offsets[0]));
   1383      } else {
   1384        // Theoretically we could check for a load-pair with the destinations
   1385        // switched, but our callers will never generate that.  Hence there's
   1386        // no loss in giving up at this point and generating two loads.
   1387        masm->Ldr(dests[0], MemOperand(masm->GetStackPointer64(), offsets[0]));
   1388        masm->Ldr(dests[1], MemOperand(masm->GetStackPointer64(), offsets[1]));
   1389      }
   1390    } else {
   1391      // [0] only.
   1392      masm->Ldr(dests[0], MemOperand(masm->GetStackPointer64(), offsets[0]));
   1393    }
   1394  } else {
   1395    if (!dests[1].IsNone()) {
   1396      // [1] only.  Can't happen because callers always fill [0] before [1].
   1397      MOZ_CRASH("GenerateLoadsThenFlush");
   1398    } else {
   1399      // Neither entry valid.  This can happen.
   1400    }
   1401  }
   1402 
   1403  // .. and flush.
   1404  dests[0] = dests[1] = vixl::NoCPUReg;
   1405  offsets[0] = offsets[1] = 0;
   1406 }
   1407 
   1408 void MacroAssembler::PopRegsInMaskIgnore(LiveRegisterSet set,
   1409                                         LiveRegisterSet ignore) {
   1410  mozilla::DebugOnly<size_t> framePushedInitial = framePushed();
   1411 
   1412  // The offset of the data from the stack pointer.
   1413  uint32_t offset = 0;
   1414 
   1415  // The set of FP/SIMD registers we need to restore.
   1416  FloatRegisterSet fpuSet(set.fpus().reduceSetForPush());
   1417 
   1418  // The set of registers to ignore.  BroadcastToAllSizes() is used to avoid
   1419  // any ambiguities arising from (eg) `fpuSet` containing q17 but `ignore`
   1420  // containing d17.
   1421  FloatRegisterSet ignoreFpusBroadcasted(
   1422      FloatRegister::BroadcastToAllSizes(ignore.fpus()));
   1423 
   1424  // First recover the SIMD (V128) registers.  This is straightforward in that
   1425  // we don't need to think about alignment holes.
   1426 
   1427  // These three form a two-entry queue that holds loads that we know we
   1428  // need, but which we haven't yet emitted.
   1429  vixl::CPURegister pendingDests[2] = {vixl::NoCPUReg, vixl::NoCPUReg};
   1430  uint32_t pendingOffsets[2] = {0, 0};
   1431  size_t nPending = 0;
   1432 
   1433  for (FloatRegisterIterator iter(fpuSet); iter.more(); ++iter) {
   1434    FloatRegister reg = *iter;
   1435    if (reg.isDouble()) {
   1436      continue;
   1437    }
   1438    MOZ_RELEASE_ASSERT(reg.isSimd128());
   1439 
   1440    uint32_t offsetForReg = offset;
   1441    offset += FloatRegister::SizeOfSimd128;
   1442 
   1443    if (ignoreFpusBroadcasted.hasRegisterIndex(reg)) {
   1444      continue;
   1445    }
   1446 
   1447    MOZ_ASSERT(nPending <= 2);
   1448    if (nPending == 2) {
   1449      GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 16);
   1450      nPending = 0;
   1451    }
   1452    pendingDests[nPending] = ARMFPRegister(reg, 128);
   1453    pendingOffsets[nPending] = offsetForReg;
   1454    nPending++;
   1455  }
   1456  GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 16);
   1457  nPending = 0;
   1458 
   1459  MOZ_ASSERT((offset % 16) == 0);
   1460 
   1461  // Now recover the FP double registers.  This is more tricky in that we need
   1462  // to skip over the lowest-addressed of them if the number of them was odd.
   1463 
   1464  if ((((fpuSet.bits() & FloatRegisters::AllDoubleMask).size()) & 1) == 1) {
   1465    offset += sizeof(double);
   1466  }
   1467 
   1468  for (FloatRegisterIterator iter(fpuSet); iter.more(); ++iter) {
   1469    FloatRegister reg = *iter;
   1470    if (reg.isSimd128()) {
   1471      continue;
   1472    }
   1473    /* true but redundant, per loop above: MOZ_RELEASE_ASSERT(reg.isDouble()) */
   1474 
   1475    uint32_t offsetForReg = offset;
   1476    offset += sizeof(double);
   1477 
   1478    if (ignoreFpusBroadcasted.hasRegisterIndex(reg)) {
   1479      continue;
   1480    }
   1481 
   1482    MOZ_ASSERT(nPending <= 2);
   1483    if (nPending == 2) {
   1484      GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8);
   1485      nPending = 0;
   1486    }
   1487    pendingDests[nPending] = ARMFPRegister(reg, 64);
   1488    pendingOffsets[nPending] = offsetForReg;
   1489    nPending++;
   1490  }
   1491  GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8);
   1492  nPending = 0;
   1493 
   1494  MOZ_ASSERT((offset % 16) == 0);
   1495  MOZ_ASSERT(offset == set.fpus().getPushSizeInBytes());
   1496 
   1497  // And finally recover the integer registers, again skipping an alignment
   1498  // hole if it exists.
   1499 
   1500  if ((set.gprs().size() & 1) == 1) {
   1501    offset += sizeof(uint64_t);
   1502  }
   1503 
   1504  for (GeneralRegisterIterator iter(set.gprs()); iter.more(); ++iter) {
   1505    Register reg = *iter;
   1506 
   1507    uint32_t offsetForReg = offset;
   1508    offset += sizeof(uint64_t);
   1509 
   1510    if (ignore.has(reg)) {
   1511      continue;
   1512    }
   1513 
   1514    MOZ_ASSERT(nPending <= 2);
   1515    if (nPending == 2) {
   1516      GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8);
   1517      nPending = 0;
   1518    }
   1519    pendingDests[nPending] = ARMRegister(reg, 64);
   1520    pendingOffsets[nPending] = offsetForReg;
   1521    nPending++;
   1522  }
   1523  GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8);
   1524 
   1525  MOZ_ASSERT((offset % 16) == 0);
   1526 
   1527  size_t bytesPushed = PushRegsInMaskSizeInBytes(set);
   1528  MOZ_ASSERT(offset == bytesPushed);
   1529  freeStack(bytesPushed);
   1530 }
   1531 
   1532 void MacroAssembler::Push(Register reg) {
   1533  push(reg);
   1534  adjustFrame(sizeof(intptr_t));
   1535 }
   1536 
   1537 void MacroAssembler::Push(Register reg1, Register reg2, Register reg3,
   1538                          Register reg4) {
   1539  push(reg1, reg2, reg3, reg4);
   1540  adjustFrame(4 * sizeof(intptr_t));
   1541 }
   1542 
   1543 void MacroAssembler::Push(const Imm32 imm) {
   1544  push(imm);
   1545  adjustFrame(sizeof(intptr_t));
   1546 }
   1547 
   1548 void MacroAssembler::Push(const ImmWord imm) {
   1549  push(imm);
   1550  adjustFrame(sizeof(intptr_t));
   1551 }
   1552 
   1553 void MacroAssembler::Push(const ImmPtr imm) {
   1554  push(imm);
   1555  adjustFrame(sizeof(intptr_t));
   1556 }
   1557 
   1558 void MacroAssembler::Push(const ImmGCPtr ptr) {
   1559  push(ptr);
   1560  adjustFrame(sizeof(intptr_t));
   1561 }
   1562 
   1563 void MacroAssembler::Push(FloatRegister f) {
   1564  push(f);
   1565  // See MacroAssemblerCompat::push(FloatRegister) for why we use
   1566  // sizeof(double).
   1567  adjustFrame(sizeof(double));
   1568 }
   1569 
   1570 void MacroAssembler::PushBoxed(FloatRegister reg) {
   1571  subFromStackPtr(Imm32(sizeof(double)));
   1572  boxDouble(reg, Address(getStackPointer(), 0));
   1573  adjustFrame(sizeof(double));
   1574 }
   1575 
   1576 void MacroAssembler::Pop(Register reg) {
   1577  pop(reg);
   1578  adjustFrame(-1 * int64_t(sizeof(int64_t)));
   1579 }
   1580 
   1581 void MacroAssembler::Pop(FloatRegister f) {
   1582  loadDouble(Address(getStackPointer(), 0), f);
   1583  // See MacroAssemblerCompat::pop(FloatRegister) for why we use
   1584  // sizeof(double).
   1585  freeStack(sizeof(double));
   1586 }
   1587 
   1588 void MacroAssembler::Pop(const ValueOperand& val) {
   1589  pop(val);
   1590  adjustFrame(-1 * int64_t(sizeof(int64_t)));
   1591 }
   1592 
   1593 void MacroAssembler::freeStackTo(uint32_t framePushed) {
   1594  MOZ_ASSERT(framePushed <= framePushed_);
   1595  Sub(GetStackPointer64(), X(FramePointer), Operand(int32_t(framePushed)));
   1596  syncStackPtr();
   1597  framePushed_ = framePushed;
   1598 }
   1599 
   1600 // ===============================================================
   1601 // Simple call functions.
   1602 
   1603 CodeOffset MacroAssembler::call(Register reg) {
   1604  // This sync has been observed (and is expected) to be necessary.
   1605  // eg testcase: tests/debug/bug1107525.js
   1606  syncStackPtr();
   1607  Blr(ARMRegister(reg, 64));
   1608  return CodeOffset(currentOffset());
   1609 }
   1610 
   1611 CodeOffset MacroAssembler::call(Label* label) {
   1612  // This sync has been observed (and is expected) to be necessary.
   1613  // eg testcase: tests/basic/testBug504520Harder.js
   1614  syncStackPtr();
   1615  Bl(label);
   1616  return CodeOffset(currentOffset());
   1617 }
   1618 
   1619 void MacroAssembler::call(ImmPtr imm) {
   1620  // This sync has been observed (and is expected) to be necessary.
   1621  // eg testcase: asm.js/testTimeout5.js
   1622  syncStackPtr();
   1623  vixl::UseScratchRegisterScope temps(this);
   1624  const Register scratch = temps.AcquireX().asUnsized();
   1625  movePtr(imm, scratch);
   1626  Blr(ARMRegister(scratch, 64));
   1627 }
   1628 
   1629 void MacroAssembler::call(ImmWord imm) { call(ImmPtr((void*)imm.value)); }
   1630 
   1631 CodeOffset MacroAssembler::call(wasm::SymbolicAddress imm) {
   1632  vixl::UseScratchRegisterScope temps(this);
   1633  const Register scratch = temps.AcquireX().asUnsized();
   1634  // This sync is believed to be necessary, although no case in jit-test/tests
   1635  // has been observed to cause SP != PSP here.
   1636  syncStackPtr();
   1637  movePtr(imm, scratch);
   1638  Blr(ARMRegister(scratch, 64));
   1639  return CodeOffset(currentOffset());
   1640 }
   1641 
   1642 CodeOffset MacroAssembler::call(const Address& addr) {
   1643  vixl::UseScratchRegisterScope temps(this);
   1644  const Register scratch = temps.AcquireX().asUnsized();
   1645  // This sync has been observed (and is expected) to be necessary.
   1646  // eg testcase: tests/backup-point-bug1315634.js
   1647  syncStackPtr();
   1648  loadPtr(addr, scratch);
   1649  Blr(ARMRegister(scratch, 64));
   1650  return CodeOffset(currentOffset());
   1651 }
   1652 
   1653 void MacroAssembler::call(JitCode* c) {
   1654  vixl::UseScratchRegisterScope temps(this);
   1655  const ARMRegister scratch64 = temps.AcquireX();
   1656  // This sync has been observed (and is expected) to be necessary.
   1657  // eg testcase: arrays/new-array-undefined-undefined-more-args-2.js
   1658  syncStackPtr();
   1659  BufferOffset off = immPool64(scratch64, uint64_t(c->raw()));
   1660  addPendingJump(off, ImmPtr(c->raw()), RelocationKind::JITCODE);
   1661  blr(scratch64);
   1662 }
   1663 
   1664 CodeOffset MacroAssembler::callWithPatch() {
   1665  // This needs to sync.  Wasm goes through this one for intramodule calls.
   1666  //
   1667  // In other cases, wasm goes through masm.wasmCallImport(),
   1668  // masm.wasmCallBuiltinInstanceMethod, masm.wasmCallIndirect, all of which
   1669  // sync.
   1670  //
   1671  // This sync is believed to be necessary, although no case in jit-test/tests
   1672  // has been observed to cause SP != PSP here.
   1673  syncStackPtr();
   1674  bl(0, LabelDoc());
   1675  return CodeOffset(currentOffset());
   1676 }
   1677 void MacroAssembler::patchCall(uint32_t callerOffset, uint32_t calleeOffset) {
   1678  Instruction* inst = getInstructionAt(BufferOffset(callerOffset - 4));
   1679  MOZ_ASSERT(inst->IsBL());
   1680  ptrdiff_t relTarget = (int)calleeOffset - ((int)callerOffset - 4);
   1681  ptrdiff_t relTarget00 = relTarget >> 2;
   1682  MOZ_RELEASE_ASSERT((relTarget & 0x3) == 0);
   1683  MOZ_RELEASE_ASSERT(vixl::IsInt26(relTarget00));
   1684  bl(inst, relTarget00);
   1685 }
   1686 
   1687 CodeOffset MacroAssembler::farJumpWithPatch() {
   1688  vixl::UseScratchRegisterScope temps(this);
   1689  const ARMRegister scratch = temps.AcquireX();
   1690  const ARMRegister scratch2 = temps.AcquireX();
   1691 
   1692  AutoForbidPoolsAndNops afp(this,
   1693                             /* max number of instructions in scope = */ 7);
   1694 
   1695  mozilla::DebugOnly<uint32_t> before = currentOffset();
   1696 
   1697  align(8);  // At most one nop
   1698 
   1699  Label branch;
   1700  adr(scratch2, &branch);
   1701  ldr(scratch, vixl::MemOperand(scratch2, 4));
   1702  add(scratch2, scratch2, scratch);
   1703  CodeOffset offs(currentOffset());
   1704  bind(&branch);
   1705  br(scratch2);
   1706  Emit(UINT32_MAX);
   1707  Emit(UINT32_MAX);
   1708 
   1709  mozilla::DebugOnly<uint32_t> after = currentOffset();
   1710 
   1711  MOZ_ASSERT_IF(!oom(), after - before == 24 || after - before == 28);
   1712 
   1713  return offs;
   1714 }
   1715 
   1716 void MacroAssembler::patchFarJump(CodeOffset farJump, uint32_t targetOffset) {
   1717  Instruction* inst1 = getInstructionAt(BufferOffset(farJump.offset() + 4));
   1718  Instruction* inst2 = getInstructionAt(BufferOffset(farJump.offset() + 8));
   1719 
   1720  int64_t distance = (int64_t)targetOffset - (int64_t)farJump.offset();
   1721 
   1722  MOZ_ASSERT(inst1->InstructionBits() == UINT32_MAX);
   1723  MOZ_ASSERT(inst2->InstructionBits() == UINT32_MAX);
   1724 
   1725  inst1->SetInstructionBits((uint32_t)distance);
   1726  inst2->SetInstructionBits((uint32_t)(distance >> 32));
   1727 }
   1728 
   1729 void MacroAssembler::patchFarJump(uint8_t* farJump, uint8_t* target) {
   1730  Instruction* inst1 = (Instruction*)(farJump + 4);
   1731  Instruction* inst2 = (Instruction*)(farJump + 8);
   1732 
   1733  int64_t distance = (int64_t)target - (int64_t)farJump;
   1734  MOZ_RELEASE_ASSERT(mozilla::Abs(distance) <=
   1735                     (intptr_t)jit::MaxCodeBytesPerProcess);
   1736 
   1737  MOZ_ASSERT(inst1->InstructionBits() == UINT32_MAX);
   1738  MOZ_ASSERT(inst2->InstructionBits() == UINT32_MAX);
   1739 
   1740  inst1->SetInstructionBits((uint32_t)distance);
   1741  inst2->SetInstructionBits((uint32_t)(distance >> 32));
   1742 }
   1743 
   1744 CodeOffset MacroAssembler::nopPatchableToCall() {
   1745  AutoForbidPoolsAndNops afp(this,
   1746                             /* max number of instructions in scope = */ 1);
   1747  Nop();
   1748  return CodeOffset(currentOffset());
   1749 }
   1750 
   1751 void MacroAssembler::patchNopToCall(uint8_t* call, uint8_t* target) {
   1752  uint8_t* inst = call - 4;
   1753  Instruction* instr = reinterpret_cast<Instruction*>(inst);
   1754  MOZ_ASSERT(instr->IsBL() || instr->IsNOP());
   1755  bl(instr, (target - inst) >> 2);
   1756 }
   1757 
   1758 void MacroAssembler::patchCallToNop(uint8_t* call) {
   1759  uint8_t* inst = call - 4;
   1760  Instruction* instr = reinterpret_cast<Instruction*>(inst);
   1761  MOZ_ASSERT(instr->IsBL() || instr->IsNOP());
   1762  nop(instr);
   1763 }
   1764 
   1765 CodeOffset MacroAssembler::move32WithPatch(Register dest) {
   1766  AutoForbidPoolsAndNops afp(this,
   1767                             /* max number of instructions in scope = */ 3);
   1768  CodeOffset offs = CodeOffset(currentOffset());
   1769  movz(ARMRegister(dest, 64), 0, 0);
   1770  movk(ARMRegister(dest, 64), 0, 16);
   1771  return offs;
   1772 }
   1773 
   1774 void MacroAssembler::patchMove32(CodeOffset offset, Imm32 n) {
   1775  Instruction* i1 = getInstructionAt(BufferOffset(offset.offset()));
   1776  MOZ_ASSERT(i1->IsMovz());
   1777  i1->SetInstructionBits(i1->InstructionBits() | ImmMoveWide(n.value & 0xFFFF));
   1778 
   1779  Instruction* i2 = getInstructionAt(BufferOffset(offset.offset() + 4));
   1780  MOZ_ASSERT(i2->IsMovk());
   1781  i2->SetInstructionBits(i2->InstructionBits() |
   1782                         ImmMoveWide((n.value >> 16) & 0xFFFF));
   1783 }
   1784 
   1785 void MacroAssembler::pushReturnAddress() {
   1786  MOZ_RELEASE_ASSERT(!sp.Is(GetStackPointer64()), "Not valid");
   1787  push(lr);
   1788 }
   1789 
   1790 void MacroAssembler::popReturnAddress() {
   1791  MOZ_RELEASE_ASSERT(!sp.Is(GetStackPointer64()), "Not valid");
   1792  pop(lr);
   1793 }
   1794 
   1795 // ===============================================================
   1796 // ABI function calls.
   1797 
   1798 void MacroAssembler::setupUnalignedABICall(Register scratch) {
   1799  // Because wasm operates without the need for dynamic alignment of SP, it is
   1800  // implied that this routine should never be called when generating wasm.
   1801  MOZ_ASSERT(!IsCompilingWasm());
   1802 
   1803  // The following won't work for SP -- needs slightly different logic.
   1804  MOZ_RELEASE_ASSERT(GetStackPointer64().Is(PseudoStackPointer64));
   1805 
   1806  setupNativeABICall();
   1807  dynamicAlignment_ = true;
   1808 
   1809  int64_t alignment = ~(int64_t(ABIStackAlignment) - 1);
   1810  ARMRegister scratch64(scratch, 64);
   1811  MOZ_ASSERT(!scratch64.Is(PseudoStackPointer64));
   1812 
   1813  // Always save LR -- Baseline ICs assume that LR isn't modified.
   1814  push(lr);
   1815 
   1816  // Remember the stack address on entry.  This is reloaded in callWithABIPost
   1817  // below.
   1818  Mov(scratch64, PseudoStackPointer64);
   1819 
   1820  // Make alignment, including the effective push of the previous sp.
   1821  Sub(PseudoStackPointer64, PseudoStackPointer64, Operand(8));
   1822  And(PseudoStackPointer64, PseudoStackPointer64, Operand(alignment));
   1823  syncStackPtr();
   1824 
   1825  // Store previous sp to the top of the stack, aligned.  This is also
   1826  // reloaded in callWithABIPost.
   1827  Str(scratch64, MemOperand(PseudoStackPointer64, 0));
   1828 }
   1829 
   1830 void MacroAssembler::callWithABIPre(uint32_t* stackAdjust, bool callFromWasm) {
   1831  // wasm operates without the need for dynamic alignment of SP.
   1832  MOZ_ASSERT(!(dynamicAlignment_ && callFromWasm));
   1833 
   1834  MOZ_ASSERT(inCall_);
   1835  uint32_t stackForCall = abiArgs_.stackBytesConsumedSoFar();
   1836 
   1837  // ARM64 *really* wants SP to always be 16-aligned, so ensure this now.
   1838  if (dynamicAlignment_) {
   1839    stackForCall += ComputeByteAlignment(stackForCall, StackAlignment);
   1840  } else {
   1841    // This can happen when we attach out-of-line stubs for rare cases.  For
   1842    // example CodeGenerator::visitWasmTruncateToInt32 adds an out-of-line
   1843    // chunk.
   1844    uint32_t alignmentAtPrologue = callFromWasm ? sizeof(wasm::Frame) : 0;
   1845    stackForCall += ComputeByteAlignment(
   1846        stackForCall + framePushed() + alignmentAtPrologue, ABIStackAlignment);
   1847  }
   1848 
   1849  *stackAdjust = stackForCall;
   1850  reserveStack(*stackAdjust);
   1851  {
   1852    enoughMemory_ &= moveResolver_.resolve();
   1853    if (!enoughMemory_) {
   1854      return;
   1855    }
   1856    MoveEmitter emitter(*this);
   1857    emitter.emit(moveResolver_);
   1858    emitter.finish();
   1859  }
   1860 
   1861  assertStackAlignment(ABIStackAlignment);
   1862 }
   1863 
   1864 void MacroAssembler::callWithABIPost(uint32_t stackAdjust, ABIType result) {
   1865  // Call boundaries communicate stack via SP, so we must resync PSP now.
   1866  initPseudoStackPtr();
   1867 
   1868  freeStack(stackAdjust);
   1869 
   1870  if (dynamicAlignment_) {
   1871    // This then-clause makes more sense if you first read
   1872    // setupUnalignedABICall above.
   1873    //
   1874    // Restore the stack pointer from entry.  The stack pointer will have been
   1875    // saved by setupUnalignedABICall.  This is fragile in that it assumes
   1876    // that uses of this routine (callWithABIPost) with `dynamicAlignment_ ==
   1877    // true` are preceded by matching calls to setupUnalignedABICall.  But
   1878    // there's nothing that enforce that mechanically.  If we really want to
   1879    // enforce this, we could add a debug-only CallWithABIState enum to the
   1880    // MacroAssembler and assert that setupUnalignedABICall updates it before
   1881    // we get here, then reset it to its initial state.
   1882    Ldr(GetStackPointer64(), MemOperand(GetStackPointer64(), 0));
   1883    syncStackPtr();
   1884 
   1885    // Restore LR.  This restores LR to the value stored by
   1886    // setupUnalignedABICall, which should have been called just before
   1887    // callWithABIPre.  This is, per the above comment, also fragile.
   1888    pop(lr);
   1889 
   1890    // SP may be < PSP now.  That is expected from the behaviour of `pop`.  It
   1891    // is not clear why the following `syncStackPtr` is necessary, but it is:
   1892    // without it, the following test segfaults:
   1893    // tests/backup-point-bug1315634.js
   1894    syncStackPtr();
   1895  }
   1896 
   1897  // If the ABI's return regs are where ION is expecting them, then
   1898  // no other work needs to be done.
   1899 
   1900 #ifdef DEBUG
   1901  MOZ_ASSERT(inCall_);
   1902  inCall_ = false;
   1903 #endif
   1904 }
   1905 
   1906 void MacroAssembler::callWithABINoProfiler(Register fun, ABIType result) {
   1907  vixl::UseScratchRegisterScope temps(this);
   1908  const Register scratch = temps.AcquireX().asUnsized();
   1909  movePtr(fun, scratch);
   1910 
   1911  uint32_t stackAdjust;
   1912  callWithABIPre(&stackAdjust);
   1913  call(scratch);
   1914  callWithABIPost(stackAdjust, result);
   1915 }
   1916 
   1917 void MacroAssembler::callWithABINoProfiler(const Address& fun, ABIType result) {
   1918  vixl::UseScratchRegisterScope temps(this);
   1919  const Register scratch = temps.AcquireX().asUnsized();
   1920  loadPtr(fun, scratch);
   1921 
   1922  uint32_t stackAdjust;
   1923  callWithABIPre(&stackAdjust);
   1924  call(scratch);
   1925  callWithABIPost(stackAdjust, result);
   1926 }
   1927 
   1928 // ===============================================================
   1929 // Jit Frames.
   1930 
   1931 uint32_t MacroAssembler::pushFakeReturnAddress(Register scratch) {
   1932  enterNoPool(3);
   1933  Label fakeCallsite;
   1934 
   1935  Adr(ARMRegister(scratch, 64), &fakeCallsite);
   1936  Push(scratch);
   1937  bind(&fakeCallsite);
   1938  uint32_t pseudoReturnOffset = currentOffset();
   1939 
   1940  leaveNoPool();
   1941  return pseudoReturnOffset;
   1942 }
   1943 
   1944 bool MacroAssemblerCompat::buildOOLFakeExitFrame(void* fakeReturnAddr) {
   1945  asMasm().Push(FrameDescriptor(FrameType::IonJS));
   1946  asMasm().Push(ImmPtr(fakeReturnAddr));
   1947  asMasm().Push(FramePointer);
   1948  return true;
   1949 }
   1950 
   1951 // ===============================================================
   1952 // Move instructions
   1953 
   1954 void MacroAssembler::moveValue(const ValueOperand& src,
   1955                               const ValueOperand& dest) {
   1956  if (src == dest) {
   1957    return;
   1958  }
   1959  movePtr(src.valueReg(), dest.valueReg());
   1960 }
   1961 
   1962 void MacroAssembler::moveValue(const Value& src, const ValueOperand& dest) {
   1963  if (!src.isGCThing()) {
   1964    movePtr(ImmWord(src.asRawBits()), dest.valueReg());
   1965    return;
   1966  }
   1967 
   1968  BufferOffset load =
   1969      movePatchablePtr(ImmPtr(src.bitsAsPunboxPointer()), dest.valueReg());
   1970  writeDataRelocation(src, load);
   1971 }
   1972 
   1973 // ===============================================================
   1974 // Branch functions
   1975 
   1976 void MacroAssembler::loadStoreBuffer(Register ptr, Register buffer) {
   1977  And(ARMRegister(buffer, 64), ARMRegister(ptr, 64),
   1978      Operand(int32_t(~gc::ChunkMask)));
   1979  loadPtr(Address(buffer, gc::ChunkStoreBufferOffset), buffer);
   1980 }
   1981 
   1982 void MacroAssembler::branchPtrInNurseryChunk(Condition cond, Register ptr,
   1983                                             Register temp, Label* label) {
   1984  MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
   1985  MOZ_ASSERT(ptr != temp);
   1986  MOZ_ASSERT(ptr != ScratchReg &&
   1987             ptr != ScratchReg2);  // Both may be used internally.
   1988  MOZ_ASSERT(temp != ScratchReg && temp != ScratchReg2);
   1989 
   1990  And(ARMRegister(temp, 64), ARMRegister(ptr, 64),
   1991      Operand(int32_t(~gc::ChunkMask)));
   1992  branchPtr(InvertCondition(cond), Address(temp, gc::ChunkStoreBufferOffset),
   1993            ImmWord(0), label);
   1994 }
   1995 
   1996 void MacroAssembler::branchValueIsNurseryCell(Condition cond,
   1997                                              const Address& address,
   1998                                              Register temp, Label* label) {
   1999  branchValueIsNurseryCellImpl(cond, address, temp, label);
   2000 }
   2001 
   2002 void MacroAssembler::branchValueIsNurseryCell(Condition cond,
   2003                                              ValueOperand value, Register temp,
   2004                                              Label* label) {
   2005  branchValueIsNurseryCellImpl(cond, value, temp, label);
   2006 }
   2007 template <typename T>
   2008 void MacroAssembler::branchValueIsNurseryCellImpl(Condition cond,
   2009                                                  const T& value, Register temp,
   2010                                                  Label* label) {
   2011  MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
   2012  MOZ_ASSERT(temp != ScratchReg &&
   2013             temp != ScratchReg2);  // Both may be used internally.
   2014 
   2015  Label done;
   2016  branchTestGCThing(Assembler::NotEqual, value,
   2017                    cond == Assembler::Equal ? &done : label);
   2018 
   2019  getGCThingValueChunk(value, temp);
   2020  branchPtr(InvertCondition(cond), Address(temp, gc::ChunkStoreBufferOffset),
   2021            ImmWord(0), label);
   2022 
   2023  bind(&done);
   2024 }
   2025 
   2026 void MacroAssembler::branchTestValue(Condition cond, const ValueOperand& lhs,
   2027                                     const Value& rhs, Label* label) {
   2028  MOZ_ASSERT(cond == Equal || cond == NotEqual);
   2029  MOZ_ASSERT(!rhs.isNaN());
   2030 
   2031  if (!rhs.isGCThing()) {
   2032    Cmp(ARMRegister(lhs.valueReg(), 64), Operand(rhs.asRawBits()));
   2033  } else {
   2034    vixl::UseScratchRegisterScope temps(this);
   2035    const ARMRegister scratch64 = temps.AcquireX();
   2036    MOZ_ASSERT(scratch64.asUnsized() != lhs.valueReg());
   2037    moveValue(rhs, ValueOperand(scratch64.asUnsized()));
   2038    Cmp(ARMRegister(lhs.valueReg(), 64), scratch64);
   2039  }
   2040  B(label, cond);
   2041 }
   2042 
   2043 void MacroAssembler::branchTestNaNValue(Condition cond, const ValueOperand& val,
   2044                                        Register temp, Label* label) {
   2045  MOZ_ASSERT(cond == Equal || cond == NotEqual);
   2046  vixl::UseScratchRegisterScope temps(this);
   2047  const ARMRegister scratch64 = temps.AcquireX();
   2048  MOZ_ASSERT(scratch64.asUnsized() != val.valueReg());
   2049 
   2050  // When testing for NaN, we want to ignore the sign bit.
   2051  And(ARMRegister(temp, 64), ARMRegister(val.valueReg(), 64),
   2052      Operand(~mozilla::FloatingPoint<double>::kSignBit));
   2053 
   2054  // Compare against a NaN with sign bit 0.
   2055  static_assert(JS::detail::CanonicalizedNaNSignBit == 0);
   2056  moveValue(DoubleValue(JS::GenericNaN()), ValueOperand(scratch64.asUnsized()));
   2057  Cmp(ARMRegister(temp, 64), scratch64);
   2058  B(label, cond);
   2059 }
   2060 
   2061 // ========================================================================
   2062 // Memory access primitives.
   2063 template <typename T>
   2064 void MacroAssembler::storeUnboxedValue(const ConstantOrRegister& value,
   2065                                       MIRType valueType, const T& dest) {
   2066  MOZ_ASSERT(valueType < MIRType::Value);
   2067 
   2068  if (valueType == MIRType::Double) {
   2069    boxDouble(value.reg().typedReg().fpu(), dest);
   2070    return;
   2071  }
   2072 
   2073  if (value.constant()) {
   2074    storeValue(value.value(), dest);
   2075  } else {
   2076    storeValue(ValueTypeFromMIRType(valueType), value.reg().typedReg().gpr(),
   2077               dest);
   2078  }
   2079 }
   2080 
   2081 template void MacroAssembler::storeUnboxedValue(const ConstantOrRegister& value,
   2082                                                MIRType valueType,
   2083                                                const Address& dest);
   2084 template void MacroAssembler::storeUnboxedValue(
   2085    const ConstantOrRegister& value, MIRType valueType,
   2086    const BaseObjectElementIndex& dest);
   2087 
   2088 void MacroAssembler::comment(const char* msg) { Assembler::comment(msg); }
   2089 
   2090 // ========================================================================
   2091 // wasm support
   2092 
   2093 FaultingCodeOffset MacroAssembler::wasmTrapInstruction() {
   2094  AutoForbidPoolsAndNops afp(this,
   2095                             /* max number of instructions in scope = */ 1);
   2096  FaultingCodeOffset fco = FaultingCodeOffset(currentOffset());
   2097  Unreachable();
   2098  return fco;
   2099 }
   2100 
   2101 void MacroAssembler::wasmBoundsCheck32(Condition cond, Register index,
   2102                                       Register boundsCheckLimit,
   2103                                       Label* label) {
   2104  branch32(cond, index, boundsCheckLimit, label);
   2105  if (JitOptions.spectreIndexMasking) {
   2106    csel(ARMRegister(index, 32), vixl::wzr, ARMRegister(index, 32), cond);
   2107  }
   2108 }
   2109 
   2110 void MacroAssembler::wasmBoundsCheck32(Condition cond, Register index,
   2111                                       Address boundsCheckLimit, Label* label) {
   2112  branch32(cond, index, boundsCheckLimit, label);
   2113  if (JitOptions.spectreIndexMasking) {
   2114    csel(ARMRegister(index, 32), vixl::wzr, ARMRegister(index, 32), cond);
   2115  }
   2116 }
   2117 
   2118 void MacroAssembler::wasmBoundsCheck64(Condition cond, Register64 index,
   2119                                       Register64 boundsCheckLimit,
   2120                                       Label* label) {
   2121  branchPtr(cond, index.reg, boundsCheckLimit.reg, label);
   2122  if (JitOptions.spectreIndexMasking) {
   2123    csel(ARMRegister(index.reg, 64), vixl::xzr, ARMRegister(index.reg, 64),
   2124         cond);
   2125  }
   2126 }
   2127 
   2128 void MacroAssembler::wasmBoundsCheck64(Condition cond, Register64 index,
   2129                                       Address boundsCheckLimit, Label* label) {
   2130  branchPtr(SwapCmpOperandsCondition(cond), boundsCheckLimit, index.reg, label);
   2131  if (JitOptions.spectreIndexMasking) {
   2132    csel(ARMRegister(index.reg, 64), vixl::xzr, ARMRegister(index.reg, 64),
   2133         cond);
   2134  }
   2135 }
   2136 
   2137 // FCVTZU behaves as follows:
   2138 //
   2139 // on NaN it produces zero
   2140 // on too large it produces UINT_MAX (for appropriate type)
   2141 // on too small it produces zero
   2142 //
   2143 // FCVTZS behaves as follows:
   2144 //
   2145 // on NaN it produces zero
   2146 // on too large it produces INT_MAX (for appropriate type)
   2147 // on too small it produces INT_MIN (ditto)
   2148 
   2149 void MacroAssembler::wasmTruncateDoubleToUInt32(FloatRegister input_,
   2150                                                Register output_,
   2151                                                bool isSaturating,
   2152                                                Label* oolEntry) {
   2153  ARMRegister output(output_, 32);
   2154  ARMFPRegister input(input_, 64);
   2155  Fcvtzu(output, input);
   2156  if (!isSaturating) {
   2157    Cmp(output, 0);
   2158    Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual);
   2159    B(oolEntry, Assembler::Equal);
   2160  }
   2161 }
   2162 
   2163 void MacroAssembler::wasmTruncateFloat32ToUInt32(FloatRegister input_,
   2164                                                 Register output_,
   2165                                                 bool isSaturating,
   2166                                                 Label* oolEntry) {
   2167  ARMRegister output(output_, 32);
   2168  ARMFPRegister input(input_, 32);
   2169  Fcvtzu(output, input);
   2170  if (!isSaturating) {
   2171    Cmp(output, 0);
   2172    Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual);
   2173    B(oolEntry, Assembler::Equal);
   2174  }
   2175 }
   2176 
   2177 void MacroAssembler::wasmTruncateDoubleToInt32(FloatRegister input_,
   2178                                               Register output_,
   2179                                               bool isSaturating,
   2180                                               Label* oolEntry) {
   2181  ARMRegister output(output_, 32);
   2182  ARMFPRegister input(input_, 64);
   2183  Fcvtzs(output, input);
   2184  if (!isSaturating) {
   2185    Cmp(output, 0);
   2186    Ccmp(output, INT32_MAX, vixl::ZFlag, Assembler::NotEqual);
   2187    Ccmp(output, INT32_MIN, vixl::ZFlag, Assembler::NotEqual);
   2188    B(oolEntry, Assembler::Equal);
   2189  }
   2190 }
   2191 
   2192 void MacroAssembler::wasmTruncateFloat32ToInt32(FloatRegister input_,
   2193                                                Register output_,
   2194                                                bool isSaturating,
   2195                                                Label* oolEntry) {
   2196  ARMRegister output(output_, 32);
   2197  ARMFPRegister input(input_, 32);
   2198  Fcvtzs(output, input);
   2199  if (!isSaturating) {
   2200    Cmp(output, 0);
   2201    Ccmp(output, INT32_MAX, vixl::ZFlag, Assembler::NotEqual);
   2202    Ccmp(output, INT32_MIN, vixl::ZFlag, Assembler::NotEqual);
   2203    B(oolEntry, Assembler::Equal);
   2204  }
   2205 }
   2206 
   2207 void MacroAssembler::wasmTruncateDoubleToUInt64(
   2208    FloatRegister input_, Register64 output_, bool isSaturating,
   2209    Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) {
   2210  MOZ_ASSERT(tempDouble.isInvalid());
   2211 
   2212  ARMRegister output(output_.reg, 64);
   2213  ARMFPRegister input(input_, 64);
   2214  Fcvtzu(output, input);
   2215  if (!isSaturating) {
   2216    Cmp(output, 0);
   2217    Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual);
   2218    B(oolEntry, Assembler::Equal);
   2219    bind(oolRejoin);
   2220  }
   2221 }
   2222 
   2223 void MacroAssembler::wasmTruncateFloat32ToUInt64(
   2224    FloatRegister input_, Register64 output_, bool isSaturating,
   2225    Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) {
   2226  MOZ_ASSERT(tempDouble.isInvalid());
   2227 
   2228  ARMRegister output(output_.reg, 64);
   2229  ARMFPRegister input(input_, 32);
   2230  Fcvtzu(output, input);
   2231  if (!isSaturating) {
   2232    Cmp(output, 0);
   2233    Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual);
   2234    B(oolEntry, Assembler::Equal);
   2235    bind(oolRejoin);
   2236  }
   2237 }
   2238 
   2239 void MacroAssembler::wasmTruncateDoubleToInt64(
   2240    FloatRegister input_, Register64 output_, bool isSaturating,
   2241    Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) {
   2242  MOZ_ASSERT(tempDouble.isInvalid());
   2243 
   2244  ARMRegister output(output_.reg, 64);
   2245  ARMFPRegister input(input_, 64);
   2246  Fcvtzs(output, input);
   2247  if (!isSaturating) {
   2248    Cmp(output, 0);
   2249    Ccmp(output, INT64_MAX, vixl::ZFlag, Assembler::NotEqual);
   2250    Ccmp(output, INT64_MIN, vixl::ZFlag, Assembler::NotEqual);
   2251    B(oolEntry, Assembler::Equal);
   2252    bind(oolRejoin);
   2253  }
   2254 }
   2255 
   2256 void MacroAssembler::wasmTruncateFloat32ToInt64(
   2257    FloatRegister input_, Register64 output_, bool isSaturating,
   2258    Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) {
   2259  ARMRegister output(output_.reg, 64);
   2260  ARMFPRegister input(input_, 32);
   2261  Fcvtzs(output, input);
   2262  if (!isSaturating) {
   2263    Cmp(output, 0);
   2264    Ccmp(output, INT64_MAX, vixl::ZFlag, Assembler::NotEqual);
   2265    Ccmp(output, INT64_MIN, vixl::ZFlag, Assembler::NotEqual);
   2266    B(oolEntry, Assembler::Equal);
   2267    bind(oolRejoin);
   2268  }
   2269 }
   2270 
   2271 void MacroAssembler::oolWasmTruncateCheckF32ToI32(
   2272    FloatRegister input, Register output, TruncFlags flags,
   2273    const wasm::TrapSiteDesc& trapSiteDesc, Label* rejoin) {
   2274  Label notNaN;
   2275  branchFloat(Assembler::DoubleOrdered, input, input, &notNaN);
   2276  wasmTrap(wasm::Trap::InvalidConversionToInteger, trapSiteDesc);
   2277  bind(&notNaN);
   2278 
   2279  Label isOverflow;
   2280  const float two_31 = -float(INT32_MIN);
   2281  ScratchFloat32Scope fpscratch(*this);
   2282  if (flags & TRUNC_UNSIGNED) {
   2283    loadConstantFloat32(two_31 * 2, fpscratch);
   2284    branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
   2285                &isOverflow);
   2286    loadConstantFloat32(-1.0f, fpscratch);
   2287    branchFloat(Assembler::DoubleGreaterThan, input, fpscratch, rejoin);
   2288  } else {
   2289    loadConstantFloat32(two_31, fpscratch);
   2290    branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
   2291                &isOverflow);
   2292    loadConstantFloat32(-two_31, fpscratch);
   2293    branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, rejoin);
   2294  }
   2295  bind(&isOverflow);
   2296  wasmTrap(wasm::Trap::IntegerOverflow, trapSiteDesc);
   2297 }
   2298 
   2299 void MacroAssembler::oolWasmTruncateCheckF64ToI32(
   2300    FloatRegister input, Register output, TruncFlags flags,
   2301    const wasm::TrapSiteDesc& trapSiteDesc, Label* rejoin) {
   2302  Label notNaN;
   2303  branchDouble(Assembler::DoubleOrdered, input, input, &notNaN);
   2304  wasmTrap(wasm::Trap::InvalidConversionToInteger, trapSiteDesc);
   2305  bind(&notNaN);
   2306 
   2307  Label isOverflow;
   2308  const double two_31 = -double(INT32_MIN);
   2309  ScratchDoubleScope fpscratch(*this);
   2310  if (flags & TRUNC_UNSIGNED) {
   2311    loadConstantDouble(two_31 * 2, fpscratch);
   2312    branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
   2313                 &isOverflow);
   2314    loadConstantDouble(-1.0, fpscratch);
   2315    branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, rejoin);
   2316  } else {
   2317    loadConstantDouble(two_31, fpscratch);
   2318    branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
   2319                 &isOverflow);
   2320    loadConstantDouble(-two_31 - 1, fpscratch);
   2321    branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, rejoin);
   2322  }
   2323  bind(&isOverflow);
   2324  wasmTrap(wasm::Trap::IntegerOverflow, trapSiteDesc);
   2325 }
   2326 
   2327 void MacroAssembler::oolWasmTruncateCheckF32ToI64(
   2328    FloatRegister input, Register64 output, TruncFlags flags,
   2329    const wasm::TrapSiteDesc& trapSiteDesc, Label* rejoin) {
   2330  Label notNaN;
   2331  branchFloat(Assembler::DoubleOrdered, input, input, &notNaN);
   2332  wasmTrap(wasm::Trap::InvalidConversionToInteger, trapSiteDesc);
   2333  bind(&notNaN);
   2334 
   2335  Label isOverflow;
   2336  const float two_63 = -float(INT64_MIN);
   2337  ScratchFloat32Scope fpscratch(*this);
   2338  if (flags & TRUNC_UNSIGNED) {
   2339    loadConstantFloat32(two_63 * 2, fpscratch);
   2340    branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
   2341                &isOverflow);
   2342    loadConstantFloat32(-1.0f, fpscratch);
   2343    branchFloat(Assembler::DoubleGreaterThan, input, fpscratch, rejoin);
   2344  } else {
   2345    loadConstantFloat32(two_63, fpscratch);
   2346    branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
   2347                &isOverflow);
   2348    loadConstantFloat32(-two_63, fpscratch);
   2349    branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, rejoin);
   2350  }
   2351  bind(&isOverflow);
   2352  wasmTrap(wasm::Trap::IntegerOverflow, trapSiteDesc);
   2353 }
   2354 
   2355 void MacroAssembler::oolWasmTruncateCheckF64ToI64(
   2356    FloatRegister input, Register64 output, TruncFlags flags,
   2357    const wasm::TrapSiteDesc& trapSiteDesc, Label* rejoin) {
   2358  Label notNaN;
   2359  branchDouble(Assembler::DoubleOrdered, input, input, &notNaN);
   2360  wasmTrap(wasm::Trap::InvalidConversionToInteger, trapSiteDesc);
   2361  bind(&notNaN);
   2362 
   2363  Label isOverflow;
   2364  const double two_63 = -double(INT64_MIN);
   2365  ScratchDoubleScope fpscratch(*this);
   2366  if (flags & TRUNC_UNSIGNED) {
   2367    loadConstantDouble(two_63 * 2, fpscratch);
   2368    branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
   2369                 &isOverflow);
   2370    loadConstantDouble(-1.0, fpscratch);
   2371    branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, rejoin);
   2372  } else {
   2373    loadConstantDouble(two_63, fpscratch);
   2374    branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
   2375                 &isOverflow);
   2376    loadConstantDouble(-two_63, fpscratch);
   2377    branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, rejoin);
   2378  }
   2379  bind(&isOverflow);
   2380  wasmTrap(wasm::Trap::IntegerOverflow, trapSiteDesc);
   2381 }
   2382 
   2383 void MacroAssembler::wasmLoad(const wasm::MemoryAccessDesc& access,
   2384                              Register memoryBase, Register ptr,
   2385                              AnyRegister output) {
   2386  wasmLoadImpl(access, memoryBase, ptr, output, Register64::Invalid());
   2387 }
   2388 
   2389 void MacroAssembler::wasmLoadI64(const wasm::MemoryAccessDesc& access,
   2390                                 Register memoryBase, Register ptr,
   2391                                 Register64 output) {
   2392  wasmLoadImpl(access, memoryBase, ptr, AnyRegister(), output);
   2393 }
   2394 
   2395 void MacroAssembler::wasmStore(const wasm::MemoryAccessDesc& access,
   2396                               AnyRegister value, Register memoryBase,
   2397                               Register ptr) {
   2398  wasmStoreImpl(access, value, Register64::Invalid(), memoryBase, ptr);
   2399 }
   2400 
   2401 void MacroAssembler::wasmStoreI64(const wasm::MemoryAccessDesc& access,
   2402                                  Register64 value, Register memoryBase,
   2403                                  Register ptr) {
   2404  wasmStoreImpl(access, AnyRegister(), value, memoryBase, ptr);
   2405 }
   2406 
   2407 void MacroAssembler::enterFakeExitFrameForWasm(Register cxreg, Register scratch,
   2408                                               ExitFrameType type) {
   2409  // Wasm stubs use the native SP, not the PSP.
   2410 
   2411  linkExitFrame(cxreg, scratch);
   2412 
   2413  MOZ_RELEASE_ASSERT(sp.Is(GetStackPointer64()));
   2414 
   2415  // SP has to be 16-byte aligned when we do a load/store, so push |type| twice
   2416  // and then add 8 bytes to SP. This leaves SP unaligned.
   2417  move32(Imm32(int32_t(type)), scratch);
   2418  push(scratch, scratch);
   2419  Add(sp, sp, 8);
   2420 
   2421  // Despite the above assertion, it is possible for control to flow from here
   2422  // to the code generated by
   2423  // MacroAssemblerCompat::handleFailureWithHandlerTail without any
   2424  // intervening assignment to PSP.  But handleFailureWithHandlerTail assumes
   2425  // that PSP is the active stack pointer.  Hence the following is necessary
   2426  // for safety.  Note we can't use initPseudoStackPtr here as that would
   2427  // generate no instructions.
   2428  Mov(PseudoStackPointer64, sp);
   2429 }
   2430 
   2431 void MacroAssembler::widenInt32(Register r) {
   2432  move32To64ZeroExtend(r, Register64(r));
   2433 }
   2434 
   2435 CodeOffset MacroAssembler::sub32FromMemAndBranchIfNegativeWithPatch(
   2436    Address address, Label* label) {
   2437  vixl::UseScratchRegisterScope temps(this);
   2438  const ARMRegister value32 = temps.AcquireW();
   2439  MOZ_ASSERT(value32.asUnsized() != address.base);
   2440  Ldr(value32, toMemOperand(address));
   2441  // -128 is arbitrary, but makes `*address` count upwards, which may help
   2442  // to identify cases where the subsequent ::patch..() call was forgotten.
   2443  Subs(value32, value32, Operand(-128));
   2444  // Points immediately after the insn to patch
   2445  CodeOffset patchPoint = CodeOffset(currentOffset());
   2446  // This assumes that Str does not change the condition codes.
   2447  Str(value32, toMemOperand(address));
   2448  B(label, Assembler::Signed);
   2449  return patchPoint;
   2450 }
   2451 
   2452 void MacroAssembler::patchSub32FromMemAndBranchIfNegative(CodeOffset offset,
   2453                                                          Imm32 imm) {
   2454  int32_t val = imm.value;
   2455  // Patching it to zero would make the insn pointless
   2456  MOZ_RELEASE_ASSERT(val >= 1 && val <= 127);
   2457  Instruction* instrPtr = getInstructionAt(BufferOffset(offset.offset() - 4));
   2458  // 31   27   23 21    9  4
   2459  // |    |    |  |     |  |
   2460  // 0011 0001 00 imm12 Rn Rd = ADDS Wd, Wn|WSP, #imm12 // (expected)
   2461  // 0111 0001 00 imm12 Rn Rd = SUBS Wd, Wn|WSP, #imm12 // (replacement)
   2462  vixl::Instr oldInstr = instrPtr->InstructionBits();
   2463  // Check opcode bits and imm field are as expected
   2464  MOZ_ASSERT((oldInstr & 0b1111'1111'11'000000000000'00000'00000U) ==
   2465             0b0011'0001'00'000000000000'00000'00000U);
   2466  MOZ_RELEASE_ASSERT((oldInstr & 0b0000'0000'00'111111111111'00000'00000U) ==
   2467                     (128 << 10));  // 128 as created above
   2468  vixl::Instr newInstr =
   2469      0b0111'0001'00'000000000000'00000'00000U |  // opcode bits
   2470      (oldInstr & 0b11111'11111) |                // existing register fields
   2471      ((val & 0b111111111111) << 10);             // #val
   2472  instrPtr->SetInstructionBits(newInstr);
   2473 }
   2474 
   2475 // ========================================================================
   2476 // Convert floating point.
   2477 
   2478 bool MacroAssembler::convertUInt64ToDoubleNeedsTemp() { return false; }
   2479 
   2480 void MacroAssembler::convertUInt64ToDouble(Register64 src, FloatRegister dest,
   2481                                           Register temp) {
   2482  MOZ_ASSERT(temp == Register::Invalid());
   2483  Ucvtf(ARMFPRegister(dest, 64), ARMRegister(src.reg, 64));
   2484 }
   2485 
   2486 void MacroAssembler::convertInt64ToDouble(Register64 src, FloatRegister dest) {
   2487  Scvtf(ARMFPRegister(dest, 64), ARMRegister(src.reg, 64));
   2488 }
   2489 
   2490 void MacroAssembler::convertUInt64ToFloat32(Register64 src, FloatRegister dest,
   2491                                            Register temp) {
   2492  MOZ_ASSERT(temp == Register::Invalid());
   2493  Ucvtf(ARMFPRegister(dest, 32), ARMRegister(src.reg, 64));
   2494 }
   2495 
   2496 void MacroAssembler::convertInt64ToFloat32(Register64 src, FloatRegister dest) {
   2497  Scvtf(ARMFPRegister(dest, 32), ARMRegister(src.reg, 64));
   2498 }
   2499 
   2500 void MacroAssembler::convertIntPtrToDouble(Register src, FloatRegister dest) {
   2501  convertInt64ToDouble(Register64(src), dest);
   2502 }
   2503 
   2504 // ========================================================================
   2505 // Primitive atomic operations.
   2506 
   2507 // The computed MemOperand must be Reg+0 because the load/store exclusive
   2508 // instructions only take a single pointer register.
   2509 
   2510 static MemOperand ComputePointerForAtomic(MacroAssembler& masm,
   2511                                          const Address& address,
   2512                                          Register scratch) {
   2513  if (address.offset == 0) {
   2514    return MemOperand(X(masm, address.base), 0);
   2515  }
   2516 
   2517  masm.Add(X(scratch), X(masm, address.base), address.offset);
   2518  return MemOperand(X(scratch), 0);
   2519 }
   2520 
   2521 static MemOperand ComputePointerForAtomic(MacroAssembler& masm,
   2522                                          const BaseIndex& address,
   2523                                          Register scratch) {
   2524  masm.Add(X(scratch), X(masm, address.base),
   2525           Operand(X(address.index), vixl::LSL, address.scale));
   2526  if (address.offset) {
   2527    masm.Add(X(scratch), X(scratch), address.offset);
   2528  }
   2529  return MemOperand(X(scratch), 0);
   2530 }
   2531 
   2532 // This sign extends to targetWidth and leaves any higher bits zero.
   2533 
   2534 static void SignOrZeroExtend(MacroAssembler& masm, Scalar::Type srcType,
   2535                             Width targetWidth, Register src, Register dest) {
   2536  bool signExtend = Scalar::isSignedIntType(srcType);
   2537 
   2538  switch (Scalar::byteSize(srcType)) {
   2539    case 1:
   2540      if (signExtend) {
   2541        masm.Sbfm(R(dest, targetWidth), R(src, targetWidth), 0, 7);
   2542      } else {
   2543        masm.Ubfm(R(dest, targetWidth), R(src, targetWidth), 0, 7);
   2544      }
   2545      break;
   2546    case 2:
   2547      if (signExtend) {
   2548        masm.Sbfm(R(dest, targetWidth), R(src, targetWidth), 0, 15);
   2549      } else {
   2550        masm.Ubfm(R(dest, targetWidth), R(src, targetWidth), 0, 15);
   2551      }
   2552      break;
   2553    case 4:
   2554      if (targetWidth == Width::_64) {
   2555        if (signExtend) {
   2556          masm.Sbfm(X(dest), X(src), 0, 31);
   2557        } else {
   2558          masm.Ubfm(X(dest), X(src), 0, 31);
   2559        }
   2560      } else if (src != dest) {
   2561        masm.Mov(R(dest, targetWidth), R(src, targetWidth));
   2562      }
   2563      break;
   2564    case 8:
   2565      if (src != dest) {
   2566        masm.Mov(R(dest, targetWidth), R(src, targetWidth));
   2567      }
   2568      break;
   2569    default:
   2570      MOZ_CRASH();
   2571  }
   2572 }
   2573 
   2574 // Exclusive-loads zero-extend their values to the full width of the X register.
   2575 //
   2576 // Note, we've promised to leave the high bits of the 64-bit register clear if
   2577 // the targetWidth is 32.
   2578 
   2579 static void LoadExclusive(MacroAssembler& masm,
   2580                          const wasm::MemoryAccessDesc* access,
   2581                          Scalar::Type srcType, Width targetWidth,
   2582                          MemOperand ptr, Register dest) {
   2583  bool signExtend = Scalar::isSignedIntType(srcType);
   2584 
   2585  // With this address form, a single native ldxr* will be emitted, and the
   2586  // AutoForbidPoolsAndNops ensures that the metadata is emitted at the
   2587  // address of the ldxr*.  Note that the use of AutoForbidPoolsAndNops is now
   2588  // a "second class" solution; the right way to do this would be to have the
   2589  // masm.<LoadInsn> calls produce an FaultingCodeOffset, and hand that value to
   2590  // `masm.append`.
   2591  MOZ_ASSERT(ptr.IsImmediateOffset() && ptr.offset() == 0);
   2592 
   2593  switch (Scalar::byteSize(srcType)) {
   2594    case 1: {
   2595      {
   2596        AutoForbidPoolsAndNops afp(
   2597            &masm,
   2598            /* max number of instructions in scope = */ 1);
   2599        if (access) {
   2600          masm.append(*access, wasm::TrapMachineInsn::Load8,
   2601                      FaultingCodeOffset(masm.currentOffset()));
   2602        }
   2603        masm.Ldxrb(W(dest), ptr);
   2604      }
   2605      if (signExtend) {
   2606        masm.Sbfm(R(dest, targetWidth), R(dest, targetWidth), 0, 7);
   2607      }
   2608      break;
   2609    }
   2610    case 2: {
   2611      {
   2612        AutoForbidPoolsAndNops afp(
   2613            &masm,
   2614            /* max number of instructions in scope = */ 1);
   2615        if (access) {
   2616          masm.append(*access, wasm::TrapMachineInsn::Load16,
   2617                      FaultingCodeOffset(masm.currentOffset()));
   2618        }
   2619        masm.Ldxrh(W(dest), ptr);
   2620      }
   2621      if (signExtend) {
   2622        masm.Sbfm(R(dest, targetWidth), R(dest, targetWidth), 0, 15);
   2623      }
   2624      break;
   2625    }
   2626    case 4: {
   2627      {
   2628        AutoForbidPoolsAndNops afp(
   2629            &masm,
   2630            /* max number of instructions in scope = */ 1);
   2631        if (access) {
   2632          masm.append(*access, wasm::TrapMachineInsn::Load32,
   2633                      FaultingCodeOffset(masm.currentOffset()));
   2634        }
   2635        masm.Ldxr(W(dest), ptr);
   2636      }
   2637      if (targetWidth == Width::_64 && signExtend) {
   2638        masm.Sbfm(X(dest), X(dest), 0, 31);
   2639      }
   2640      break;
   2641    }
   2642    case 8: {
   2643      {
   2644        AutoForbidPoolsAndNops afp(
   2645            &masm,
   2646            /* max number of instructions in scope = */ 1);
   2647        if (access) {
   2648          masm.append(*access, wasm::TrapMachineInsn::Load64,
   2649                      FaultingCodeOffset(masm.currentOffset()));
   2650        }
   2651        masm.Ldxr(X(dest), ptr);
   2652      }
   2653      break;
   2654    }
   2655    default: {
   2656      MOZ_CRASH();
   2657    }
   2658  }
   2659 }
   2660 
   2661 static void StoreExclusive(MacroAssembler& masm, Scalar::Type type,
   2662                           Register status, Register src, MemOperand ptr) {
   2663  // Note, these are not decorated with a TrapSite only because they are
   2664  // assumed to be preceded by a LoadExclusive to the same address, of the
   2665  // same width, so that will always take the page fault if the address is bad.
   2666  switch (Scalar::byteSize(type)) {
   2667    case 1:
   2668      masm.Stxrb(W(status), W(src), ptr);
   2669      break;
   2670    case 2:
   2671      masm.Stxrh(W(status), W(src), ptr);
   2672      break;
   2673    case 4:
   2674      masm.Stxr(W(status), W(src), ptr);
   2675      break;
   2676    case 8:
   2677      masm.Stxr(W(status), X(src), ptr);
   2678      break;
   2679  }
   2680 }
   2681 
   2682 static bool HasAtomicInstructions(MacroAssembler& masm) {
   2683  return masm.asVIXL().GetCPUFeatures()->Has(vixl::CPUFeatures::kAtomics);
   2684 }
   2685 
   2686 static inline bool SupportedAtomicInstructionOperands(Scalar::Type type,
   2687                                                      Width targetWidth) {
   2688  if (targetWidth == Width::_32) {
   2689    return byteSize(type) <= 4;
   2690  }
   2691  if (targetWidth == Width::_64) {
   2692    return byteSize(type) == 8;
   2693  }
   2694  return false;
   2695 }
   2696 
   2697 template <typename T>
   2698 static void CompareExchange(MacroAssembler& masm,
   2699                            const wasm::MemoryAccessDesc* access,
   2700                            Scalar::Type type, Width targetWidth,
   2701                            Synchronization sync, const T& mem, Register oldval,
   2702                            Register newval, Register output) {
   2703  MOZ_ASSERT(oldval != output && newval != output);
   2704 
   2705  vixl::UseScratchRegisterScope temps(&masm);
   2706 
   2707  Register ptrScratch = temps.AcquireX().asUnsized();
   2708  MemOperand ptr = ComputePointerForAtomic(masm, mem, ptrScratch);
   2709 
   2710  MOZ_ASSERT(ptr.base().asUnsized() != output);
   2711 
   2712  if (HasAtomicInstructions(masm) &&
   2713      SupportedAtomicInstructionOperands(type, targetWidth)) {
   2714    masm.Mov(X(output), X(oldval));
   2715    // Capal is using same atomic mechanism as Ldxr/Stxr, and
   2716    // consider it is the same for "Inner Shareable" domain.
   2717    // Not updated gen_cmpxchg in GenerateAtomicOperations.py.
   2718    masm.memoryBarrierBefore(sync);
   2719    {
   2720      AutoForbidPoolsAndNops afp(&masm, /* number of insns = */ 1);
   2721      if (access) {
   2722        masm.append(*access, wasm::TrapMachineInsn::Atomic,
   2723                    FaultingCodeOffset(masm.currentOffset()));
   2724      }
   2725      switch (byteSize(type)) {
   2726        case 1:
   2727          masm.Casalb(R(output, targetWidth), R(newval, targetWidth), ptr);
   2728          break;
   2729        case 2:
   2730          masm.Casalh(R(output, targetWidth), R(newval, targetWidth), ptr);
   2731          break;
   2732        case 4:
   2733        case 8:
   2734          masm.Casal(R(output, targetWidth), R(newval, targetWidth), ptr);
   2735          break;
   2736        default:
   2737          MOZ_CRASH("CompareExchange unsupported type");
   2738      }
   2739    }
   2740    masm.memoryBarrierAfter(sync);
   2741    SignOrZeroExtend(masm, type, targetWidth, output, output);
   2742    return;
   2743  }
   2744 
   2745  // The target doesn't support atomics, so generate a LL-SC loop. This requires
   2746  // only AArch64 v8.0.
   2747  Label again;
   2748  Label done;
   2749 
   2750  // NOTE: the generated code must match the assembly code in gen_cmpxchg in
   2751  // GenerateAtomicOperations.py
   2752  masm.memoryBarrierBefore(sync);
   2753 
   2754  Register scratch = temps.AcquireX().asUnsized();
   2755 
   2756  masm.bind(&again);
   2757  SignOrZeroExtend(masm, type, targetWidth, oldval, scratch);
   2758  LoadExclusive(masm, access, type, targetWidth, ptr, output);
   2759  masm.Cmp(R(output, targetWidth), R(scratch, targetWidth));
   2760  masm.B(&done, MacroAssembler::NotEqual);
   2761  StoreExclusive(masm, type, scratch, newval, ptr);
   2762  masm.Cbnz(W(scratch), &again);
   2763  masm.bind(&done);
   2764 
   2765  masm.memoryBarrierAfter(sync);
   2766 }
   2767 
   2768 template <typename T>
   2769 static void AtomicExchange(MacroAssembler& masm,
   2770                           const wasm::MemoryAccessDesc* access,
   2771                           Scalar::Type type, Width targetWidth,
   2772                           Synchronization sync, const T& mem, Register value,
   2773                           Register output) {
   2774  MOZ_ASSERT(value != output);
   2775 
   2776  vixl::UseScratchRegisterScope temps(&masm);
   2777 
   2778  Register ptrScratch = temps.AcquireX().asUnsized();
   2779  MemOperand ptr = ComputePointerForAtomic(masm, mem, ptrScratch);
   2780 
   2781  if (HasAtomicInstructions(masm) &&
   2782      SupportedAtomicInstructionOperands(type, targetWidth)) {
   2783    // Swpal is using same atomic mechanism as Ldxr/Stxr, and
   2784    // consider it is the same for "Inner Shareable" domain.
   2785    // Not updated gen_exchange in GenerateAtomicOperations.py.
   2786    masm.memoryBarrierBefore(sync);
   2787    {
   2788      AutoForbidPoolsAndNops afp(&masm, /* number of insns = */ 1);
   2789      if (access) {
   2790        masm.append(*access, wasm::TrapMachineInsn::Atomic,
   2791                    FaultingCodeOffset(masm.currentOffset()));
   2792      }
   2793      switch (byteSize(type)) {
   2794        case 1:
   2795          masm.Swpalb(R(value, targetWidth), R(output, targetWidth), ptr);
   2796          break;
   2797        case 2:
   2798          masm.Swpalh(R(value, targetWidth), R(output, targetWidth), ptr);
   2799          break;
   2800        case 4:
   2801        case 8:
   2802          masm.Swpal(R(value, targetWidth), R(output, targetWidth), ptr);
   2803          break;
   2804        default:
   2805          MOZ_CRASH("AtomicExchange unsupported type");
   2806      }
   2807    }
   2808    masm.memoryBarrierAfter(sync);
   2809    SignOrZeroExtend(masm, type, targetWidth, output, output);
   2810    return;
   2811  }
   2812 
   2813  // The target doesn't support atomics, so generate a LL-SC loop. This requires
   2814  // only AArch64 v8.0.
   2815  Label again;
   2816 
   2817  // NOTE: the generated code must match the assembly code in gen_exchange in
   2818  // GenerateAtomicOperations.py
   2819  masm.memoryBarrierBefore(sync);
   2820 
   2821  Register scratch = temps.AcquireX().asUnsized();
   2822 
   2823  masm.bind(&again);
   2824  LoadExclusive(masm, access, type, targetWidth, ptr, output);
   2825  StoreExclusive(masm, type, scratch, value, ptr);
   2826  masm.Cbnz(W(scratch), &again);
   2827 
   2828  masm.memoryBarrierAfter(sync);
   2829 }
   2830 
   2831 template <bool wantResult, typename T>
   2832 static void AtomicFetchOp(MacroAssembler& masm,
   2833                          const wasm::MemoryAccessDesc* access,
   2834                          Scalar::Type type, Width targetWidth,
   2835                          Synchronization sync, AtomicOp op, const T& mem,
   2836                          Register value, Register temp, Register output) {
   2837  MOZ_ASSERT(value != output);
   2838  MOZ_ASSERT(value != temp);
   2839  MOZ_ASSERT_IF(wantResult, output != temp);
   2840 
   2841  vixl::UseScratchRegisterScope temps(&masm);
   2842 
   2843  Register ptrScratch = temps.AcquireX().asUnsized();
   2844  MemOperand ptr = ComputePointerForAtomic(masm, mem, ptrScratch);
   2845 
   2846  if (HasAtomicInstructions(masm) &&
   2847      SupportedAtomicInstructionOperands(type, targetWidth) &&
   2848      !isFloatingType(type)) {
   2849    // LdXXXal/StXXXl is using same atomic mechanism as Ldxr/Stxr, and
   2850    // consider it is the same for "Inner Shareable" domain.
   2851    // Not updated gen_fetchop in GenerateAtomicOperations.py.
   2852    masm.memoryBarrierBefore(sync);
   2853 
   2854 #define FETCH_OP_CASE(op, arg)                                                \
   2855  {                                                                           \
   2856    AutoForbidPoolsAndNops afp(&masm, /* num insns = */ 1);                   \
   2857    if (access) {                                                             \
   2858      masm.append(*access, wasm::TrapMachineInsn::Atomic,                     \
   2859                  FaultingCodeOffset(masm.currentOffset()));                  \
   2860    }                                                                         \
   2861    switch (byteSize(type)) {                                                 \
   2862      case 1:                                                                 \
   2863        if (wantResult) {                                                     \
   2864          masm.Ld##op##alb(R(arg, targetWidth), R(output, targetWidth), ptr); \
   2865        } else {                                                              \
   2866          masm.St##op##lb(R(arg, targetWidth), ptr);                          \
   2867        }                                                                     \
   2868        break;                                                                \
   2869      case 2:                                                                 \
   2870        if (wantResult) {                                                     \
   2871          masm.Ld##op##alh(R(arg, targetWidth), R(output, targetWidth), ptr); \
   2872        } else {                                                              \
   2873          masm.St##op##lh(R(arg, targetWidth), ptr);                          \
   2874        }                                                                     \
   2875        break;                                                                \
   2876      case 4:                                                                 \
   2877      case 8:                                                                 \
   2878        if (wantResult) {                                                     \
   2879          masm.Ld##op##al(R(arg, targetWidth), R(output, targetWidth), ptr);  \
   2880        } else {                                                              \
   2881          masm.St##op##l(R(arg, targetWidth), ptr);                           \
   2882        }                                                                     \
   2883        break;                                                                \
   2884      default:                                                                \
   2885        MOZ_CRASH("AtomicFetchOp unsupported type");                          \
   2886    }                                                                         \
   2887  }
   2888 
   2889    switch (op) {
   2890      case AtomicOp::Add:
   2891        FETCH_OP_CASE(add, value);
   2892        break;
   2893      case AtomicOp::Sub: {
   2894        Register scratch = temps.AcquireX().asUnsized();
   2895        masm.Neg(X(scratch), X(value));
   2896        FETCH_OP_CASE(add, scratch);
   2897        break;
   2898      }
   2899      case AtomicOp::And: {
   2900        Register scratch = temps.AcquireX().asUnsized();
   2901        masm.Eor(X(scratch), X(value), Operand(~0));
   2902        FETCH_OP_CASE(clr, scratch);
   2903        break;
   2904      }
   2905      case AtomicOp::Or:
   2906        FETCH_OP_CASE(set, value);
   2907        break;
   2908      case AtomicOp::Xor:
   2909        FETCH_OP_CASE(eor, value);
   2910        break;
   2911    }
   2912    masm.memoryBarrierAfter(sync);
   2913    if (wantResult) {
   2914      SignOrZeroExtend(masm, type, targetWidth, output, output);
   2915    }
   2916    return;
   2917  }
   2918 
   2919 #undef FETCH_OP_CASE
   2920 
   2921  // The target doesn't support atomics, so generate a LL-SC loop. This requires
   2922  // only AArch64 v8.0.
   2923  Label again;
   2924 
   2925  // NOTE: the generated code must match the assembly code in gen_fetchop in
   2926  // GenerateAtomicOperations.py
   2927  masm.memoryBarrierBefore(sync);
   2928 
   2929  Register scratch = temps.AcquireX().asUnsized();
   2930 
   2931  masm.bind(&again);
   2932  LoadExclusive(masm, access, type, targetWidth, ptr, output);
   2933  switch (op) {
   2934    case AtomicOp::Add:
   2935      masm.Add(X(temp), X(output), X(value));
   2936      break;
   2937    case AtomicOp::Sub:
   2938      masm.Sub(X(temp), X(output), X(value));
   2939      break;
   2940    case AtomicOp::And:
   2941      masm.And(X(temp), X(output), X(value));
   2942      break;
   2943    case AtomicOp::Or:
   2944      masm.Orr(X(temp), X(output), X(value));
   2945      break;
   2946    case AtomicOp::Xor:
   2947      masm.Eor(X(temp), X(output), X(value));
   2948      break;
   2949  }
   2950  StoreExclusive(masm, type, scratch, temp, ptr);
   2951  masm.Cbnz(W(scratch), &again);
   2952  if (wantResult) {
   2953    SignOrZeroExtend(masm, type, targetWidth, output, output);
   2954  }
   2955 
   2956  masm.memoryBarrierAfter(sync);
   2957 }
   2958 
   2959 void MacroAssembler::compareExchange(Scalar::Type type, Synchronization sync,
   2960                                     const Address& mem, Register oldval,
   2961                                     Register newval, Register output) {
   2962  CompareExchange(*this, nullptr, type, Width::_32, sync, mem, oldval, newval,
   2963                  output);
   2964 }
   2965 
   2966 void MacroAssembler::compareExchange(Scalar::Type type, Synchronization sync,
   2967                                     const BaseIndex& mem, Register oldval,
   2968                                     Register newval, Register output) {
   2969  CompareExchange(*this, nullptr, type, Width::_32, sync, mem, oldval, newval,
   2970                  output);
   2971 }
   2972 
   2973 void MacroAssembler::compareExchange64(Synchronization sync, const Address& mem,
   2974                                       Register64 expect, Register64 replace,
   2975                                       Register64 output) {
   2976  CompareExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem,
   2977                  expect.reg, replace.reg, output.reg);
   2978 }
   2979 
   2980 void MacroAssembler::compareExchange64(Synchronization sync,
   2981                                       const BaseIndex& mem, Register64 expect,
   2982                                       Register64 replace, Register64 output) {
   2983  CompareExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem,
   2984                  expect.reg, replace.reg, output.reg);
   2985 }
   2986 
   2987 void MacroAssembler::atomicExchange64(Synchronization sync, const Address& mem,
   2988                                      Register64 value, Register64 output) {
   2989  AtomicExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem,
   2990                 value.reg, output.reg);
   2991 }
   2992 
   2993 void MacroAssembler::atomicExchange64(Synchronization sync,
   2994                                      const BaseIndex& mem, Register64 value,
   2995                                      Register64 output) {
   2996  AtomicExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem,
   2997                 value.reg, output.reg);
   2998 }
   2999 
   3000 void MacroAssembler::atomicFetchOp64(Synchronization sync, AtomicOp op,
   3001                                     Register64 value, const Address& mem,
   3002                                     Register64 temp, Register64 output) {
   3003  AtomicFetchOp<true>(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem,
   3004                      value.reg, temp.reg, output.reg);
   3005 }
   3006 
   3007 void MacroAssembler::atomicFetchOp64(Synchronization sync, AtomicOp op,
   3008                                     Register64 value, const BaseIndex& mem,
   3009                                     Register64 temp, Register64 output) {
   3010  AtomicFetchOp<true>(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem,
   3011                      value.reg, temp.reg, output.reg);
   3012 }
   3013 
   3014 void MacroAssembler::atomicEffectOp64(Synchronization sync, AtomicOp op,
   3015                                      Register64 value, const Address& mem,
   3016                                      Register64 temp) {
   3017  AtomicFetchOp<false>(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem,
   3018                       value.reg, temp.reg, temp.reg);
   3019 }
   3020 
   3021 void MacroAssembler::atomicEffectOp64(Synchronization sync, AtomicOp op,
   3022                                      Register64 value, const BaseIndex& mem,
   3023                                      Register64 temp) {
   3024  AtomicFetchOp<false>(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem,
   3025                       value.reg, temp.reg, temp.reg);
   3026 }
   3027 
   3028 void MacroAssembler::wasmCompareExchange(const wasm::MemoryAccessDesc& access,
   3029                                         const Address& mem, Register oldval,
   3030                                         Register newval, Register output) {
   3031  CompareExchange(*this, &access, access.type(), Width::_32, access.sync(), mem,
   3032                  oldval, newval, output);
   3033 }
   3034 
   3035 void MacroAssembler::wasmCompareExchange(const wasm::MemoryAccessDesc& access,
   3036                                         const BaseIndex& mem, Register oldval,
   3037                                         Register newval, Register output) {
   3038  CompareExchange(*this, &access, access.type(), Width::_32, access.sync(), mem,
   3039                  oldval, newval, output);
   3040 }
   3041 
   3042 void MacroAssembler::atomicExchange(Scalar::Type type, Synchronization sync,
   3043                                    const Address& mem, Register value,
   3044                                    Register output) {
   3045  AtomicExchange(*this, nullptr, type, Width::_32, sync, mem, value, output);
   3046 }
   3047 
   3048 void MacroAssembler::atomicExchange(Scalar::Type type, Synchronization sync,
   3049                                    const BaseIndex& mem, Register value,
   3050                                    Register output) {
   3051  AtomicExchange(*this, nullptr, type, Width::_32, sync, mem, value, output);
   3052 }
   3053 
   3054 void MacroAssembler::wasmAtomicExchange(const wasm::MemoryAccessDesc& access,
   3055                                        const Address& mem, Register value,
   3056                                        Register output) {
   3057  AtomicExchange(*this, &access, access.type(), Width::_32, access.sync(), mem,
   3058                 value, output);
   3059 }
   3060 
   3061 void MacroAssembler::wasmAtomicExchange(const wasm::MemoryAccessDesc& access,
   3062                                        const BaseIndex& mem, Register value,
   3063                                        Register output) {
   3064  AtomicExchange(*this, &access, access.type(), Width::_32, access.sync(), mem,
   3065                 value, output);
   3066 }
   3067 
   3068 void MacroAssembler::atomicFetchOp(Scalar::Type type, Synchronization sync,
   3069                                   AtomicOp op, Register value,
   3070                                   const Address& mem, Register temp,
   3071                                   Register output) {
   3072  AtomicFetchOp<true>(*this, nullptr, type, Width::_32, sync, op, mem, value,
   3073                      temp, output);
   3074 }
   3075 
   3076 void MacroAssembler::atomicFetchOp(Scalar::Type type, Synchronization sync,
   3077                                   AtomicOp op, Register value,
   3078                                   const BaseIndex& mem, Register temp,
   3079                                   Register output) {
   3080  AtomicFetchOp<true>(*this, nullptr, type, Width::_32, sync, op, mem, value,
   3081                      temp, output);
   3082 }
   3083 
   3084 void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access,
   3085                                       AtomicOp op, Register value,
   3086                                       const Address& mem, Register temp,
   3087                                       Register output) {
   3088  AtomicFetchOp<true>(*this, &access, access.type(), Width::_32, access.sync(),
   3089                      op, mem, value, temp, output);
   3090 }
   3091 
   3092 void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access,
   3093                                       AtomicOp op, Register value,
   3094                                       const BaseIndex& mem, Register temp,
   3095                                       Register output) {
   3096  AtomicFetchOp<true>(*this, &access, access.type(), Width::_32, access.sync(),
   3097                      op, mem, value, temp, output);
   3098 }
   3099 
   3100 void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access,
   3101                                        AtomicOp op, Register value,
   3102                                        const Address& mem, Register temp) {
   3103  AtomicFetchOp<false>(*this, &access, access.type(), Width::_32, access.sync(),
   3104                       op, mem, value, temp, temp);
   3105 }
   3106 
   3107 void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access,
   3108                                        AtomicOp op, Register value,
   3109                                        const BaseIndex& mem, Register temp) {
   3110  AtomicFetchOp<false>(*this, &access, access.type(), Width::_32, access.sync(),
   3111                       op, mem, value, temp, temp);
   3112 }
   3113 
   3114 void MacroAssembler::wasmCompareExchange64(const wasm::MemoryAccessDesc& access,
   3115                                           const Address& mem,
   3116                                           Register64 expect,
   3117                                           Register64 replace,
   3118                                           Register64 output) {
   3119  CompareExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem,
   3120                  expect.reg, replace.reg, output.reg);
   3121 }
   3122 
   3123 void MacroAssembler::wasmCompareExchange64(const wasm::MemoryAccessDesc& access,
   3124                                           const BaseIndex& mem,
   3125                                           Register64 expect,
   3126                                           Register64 replace,
   3127                                           Register64 output) {
   3128  CompareExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem,
   3129                  expect.reg, replace.reg, output.reg);
   3130 }
   3131 
   3132 void MacroAssembler::wasmAtomicExchange64(const wasm::MemoryAccessDesc& access,
   3133                                          const Address& mem, Register64 value,
   3134                                          Register64 output) {
   3135  AtomicExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem,
   3136                 value.reg, output.reg);
   3137 }
   3138 
   3139 void MacroAssembler::wasmAtomicExchange64(const wasm::MemoryAccessDesc& access,
   3140                                          const BaseIndex& mem,
   3141                                          Register64 value, Register64 output) {
   3142  AtomicExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem,
   3143                 value.reg, output.reg);
   3144 }
   3145 
   3146 void MacroAssembler::wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access,
   3147                                         AtomicOp op, Register64 value,
   3148                                         const Address& mem, Register64 temp,
   3149                                         Register64 output) {
   3150  AtomicFetchOp<true>(*this, &access, Scalar::Int64, Width::_64, access.sync(),
   3151                      op, mem, value.reg, temp.reg, output.reg);
   3152 }
   3153 
   3154 void MacroAssembler::wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access,
   3155                                         AtomicOp op, Register64 value,
   3156                                         const BaseIndex& mem, Register64 temp,
   3157                                         Register64 output) {
   3158  AtomicFetchOp<true>(*this, &access, Scalar::Int64, Width::_64, access.sync(),
   3159                      op, mem, value.reg, temp.reg, output.reg);
   3160 }
   3161 
   3162 void MacroAssembler::wasmAtomicEffectOp64(const wasm::MemoryAccessDesc& access,
   3163                                          AtomicOp op, Register64 value,
   3164                                          const BaseIndex& mem,
   3165                                          Register64 temp) {
   3166  AtomicFetchOp<false>(*this, &access, Scalar::Int64, Width::_64, access.sync(),
   3167                       op, mem, value.reg, temp.reg, temp.reg);
   3168 }
   3169 
   3170 // ========================================================================
   3171 // JS atomic operations.
   3172 
   3173 template <typename T>
   3174 static void CompareExchangeJS(MacroAssembler& masm, Scalar::Type arrayType,
   3175                              Synchronization sync, const T& mem,
   3176                              Register oldval, Register newval, Register temp,
   3177                              AnyRegister output) {
   3178  if (arrayType == Scalar::Uint32) {
   3179    masm.compareExchange(arrayType, sync, mem, oldval, newval, temp);
   3180    masm.convertUInt32ToDouble(temp, output.fpu());
   3181  } else {
   3182    masm.compareExchange(arrayType, sync, mem, oldval, newval, output.gpr());
   3183  }
   3184 }
   3185 
   3186 void MacroAssembler::compareExchangeJS(Scalar::Type arrayType,
   3187                                       Synchronization sync, const Address& mem,
   3188                                       Register oldval, Register newval,
   3189                                       Register temp, AnyRegister output) {
   3190  CompareExchangeJS(*this, arrayType, sync, mem, oldval, newval, temp, output);
   3191 }
   3192 
   3193 void MacroAssembler::compareExchangeJS(Scalar::Type arrayType,
   3194                                       Synchronization sync,
   3195                                       const BaseIndex& mem, Register oldval,
   3196                                       Register newval, Register temp,
   3197                                       AnyRegister output) {
   3198  CompareExchangeJS(*this, arrayType, sync, mem, oldval, newval, temp, output);
   3199 }
   3200 
   3201 template <typename T>
   3202 static void AtomicExchangeJS(MacroAssembler& masm, Scalar::Type arrayType,
   3203                             Synchronization sync, const T& mem, Register value,
   3204                             Register temp, AnyRegister output) {
   3205  if (arrayType == Scalar::Uint32) {
   3206    masm.atomicExchange(arrayType, sync, mem, value, temp);
   3207    masm.convertUInt32ToDouble(temp, output.fpu());
   3208  } else {
   3209    masm.atomicExchange(arrayType, sync, mem, value, output.gpr());
   3210  }
   3211 }
   3212 
   3213 void MacroAssembler::atomicExchangeJS(Scalar::Type arrayType,
   3214                                      Synchronization sync, const Address& mem,
   3215                                      Register value, Register temp,
   3216                                      AnyRegister output) {
   3217  AtomicExchangeJS(*this, arrayType, sync, mem, value, temp, output);
   3218 }
   3219 
   3220 void MacroAssembler::atomicExchangeJS(Scalar::Type arrayType,
   3221                                      Synchronization sync,
   3222                                      const BaseIndex& mem, Register value,
   3223                                      Register temp, AnyRegister output) {
   3224  AtomicExchangeJS(*this, arrayType, sync, mem, value, temp, output);
   3225 }
   3226 
   3227 template <typename T>
   3228 static void AtomicFetchOpJS(MacroAssembler& masm, Scalar::Type arrayType,
   3229                            Synchronization sync, AtomicOp op, Register value,
   3230                            const T& mem, Register temp1, Register temp2,
   3231                            AnyRegister output) {
   3232  if (arrayType == Scalar::Uint32) {
   3233    masm.atomicFetchOp(arrayType, sync, op, value, mem, temp2, temp1);
   3234    masm.convertUInt32ToDouble(temp1, output.fpu());
   3235  } else {
   3236    masm.atomicFetchOp(arrayType, sync, op, value, mem, temp1, output.gpr());
   3237  }
   3238 }
   3239 
   3240 void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType,
   3241                                     Synchronization sync, AtomicOp op,
   3242                                     Register value, const Address& mem,
   3243                                     Register temp1, Register temp2,
   3244                                     AnyRegister output) {
   3245  AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output);
   3246 }
   3247 
   3248 void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType,
   3249                                     Synchronization sync, AtomicOp op,
   3250                                     Register value, const BaseIndex& mem,
   3251                                     Register temp1, Register temp2,
   3252                                     AnyRegister output) {
   3253  AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output);
   3254 }
   3255 
   3256 void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType,
   3257                                      Synchronization sync, AtomicOp op,
   3258                                      Register value, const BaseIndex& mem,
   3259                                      Register temp) {
   3260  AtomicFetchOp<false>(*this, nullptr, arrayType, Width::_32, sync, op, mem,
   3261                       value, temp, temp);
   3262 }
   3263 
   3264 void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType,
   3265                                      Synchronization sync, AtomicOp op,
   3266                                      Register value, const Address& mem,
   3267                                      Register temp) {
   3268  AtomicFetchOp<false>(*this, nullptr, arrayType, Width::_32, sync, op, mem,
   3269                       value, temp, temp);
   3270 }
   3271 
   3272 void MacroAssembler::atomicPause() { Isb(); }
   3273 
   3274 void MacroAssembler::flexibleQuotient32(Register lhs, Register rhs,
   3275                                        Register dest, bool isUnsigned,
   3276                                        const LiveRegisterSet&) {
   3277  quotient32(lhs, rhs, dest, isUnsigned);
   3278 }
   3279 
   3280 void MacroAssembler::flexibleQuotientPtr(
   3281    Register lhs, Register rhs, Register dest, bool isUnsigned,
   3282    const LiveRegisterSet& volatileLiveRegs) {
   3283  quotient64(lhs, rhs, dest, isUnsigned);
   3284 }
   3285 
   3286 void MacroAssembler::flexibleRemainder32(Register lhs, Register rhs,
   3287                                         Register dest, bool isUnsigned,
   3288                                         const LiveRegisterSet&) {
   3289  remainder32(lhs, rhs, dest, isUnsigned);
   3290 }
   3291 
   3292 void MacroAssembler::flexibleRemainderPtr(
   3293    Register lhs, Register rhs, Register dest, bool isUnsigned,
   3294    const LiveRegisterSet& volatileLiveRegs) {
   3295  remainder64(lhs, rhs, dest, isUnsigned);
   3296 }
   3297 
   3298 void MacroAssembler::flexibleDivMod32(Register lhs, Register rhs,
   3299                                      Register divOutput, Register remOutput,
   3300                                      bool isUnsigned, const LiveRegisterSet&) {
   3301  MOZ_ASSERT(lhs != divOutput && lhs != remOutput, "lhs is preserved");
   3302  MOZ_ASSERT(rhs != divOutput && rhs != remOutput, "rhs is preserved");
   3303 
   3304  if (isUnsigned) {
   3305    Udiv(ARMRegister(divOutput, 32), ARMRegister(lhs, 32),
   3306         ARMRegister(rhs, 32));
   3307  } else {
   3308    Sdiv(ARMRegister(divOutput, 32), ARMRegister(lhs, 32),
   3309         ARMRegister(rhs, 32));
   3310  }
   3311 
   3312  // Compute the remainder: remOutput = lhs - (divOutput * rhs).
   3313  Msub(/* result= */ ARMRegister(remOutput, 32), ARMRegister(divOutput, 32),
   3314       ARMRegister(rhs, 32), ARMRegister(lhs, 32));
   3315 }
   3316 
   3317 CodeOffset MacroAssembler::moveNearAddressWithPatch(Register dest) {
   3318  AutoForbidPoolsAndNops afp(this,
   3319                             /* max number of instructions in scope = */ 1);
   3320  CodeOffset offset(currentOffset());
   3321  adr(ARMRegister(dest, 64), 0, LabelDoc());
   3322  return offset;
   3323 }
   3324 
   3325 void MacroAssembler::patchNearAddressMove(CodeLocationLabel loc,
   3326                                          CodeLocationLabel target) {
   3327  ptrdiff_t off = target - loc;
   3328  MOZ_RELEASE_ASSERT(vixl::IsInt21(off));
   3329 
   3330  Instruction* cur = reinterpret_cast<Instruction*>(loc.raw());
   3331  MOZ_ASSERT(cur->IsADR());
   3332 
   3333  vixl::Register rd = vixl::XRegister(cur->Rd());
   3334  adr(cur, rd, off);
   3335 }
   3336 
   3337 // ========================================================================
   3338 // Spectre Mitigations.
   3339 
   3340 void MacroAssembler::speculationBarrier() {
   3341  // Conditional speculation barrier.
   3342  csdb();
   3343 }
   3344 
   3345 void MacroAssembler::floorFloat32ToInt32(FloatRegister src, Register dest,
   3346                                         Label* fail) {
   3347  ARMFPRegister iFlt(src, 32);
   3348  ARMRegister o64(dest, 64);
   3349  ARMRegister o32(dest, 32);
   3350 
   3351  Label handleZero;
   3352  Label fin;
   3353 
   3354  // Handle ±0 and NaN first.
   3355  Fcmp(iFlt, 0.0);
   3356  B(Assembler::Equal, &handleZero);
   3357  // NaN is always a bail condition, just bail directly.
   3358  B(Assembler::Overflow, fail);
   3359 
   3360  // Round towards negative infinity.
   3361  Fcvtms(o64, iFlt);
   3362 
   3363  // Sign extend lower 32 bits to test if the result isn't an Int32.
   3364  Cmp(o64, Operand(o64, vixl::SXTW));
   3365  B(NotEqual, fail);
   3366 
   3367  // Clear upper 32 bits.
   3368  Uxtw(o64, o64);
   3369  B(&fin);
   3370 
   3371  bind(&handleZero);
   3372  // Move the float into the output reg, if it is non-zero, then the original
   3373  // value was -0.0.
   3374  Fmov(o32, iFlt);
   3375  Cbnz(o32, fail);
   3376  bind(&fin);
   3377 }
   3378 
   3379 void MacroAssembler::floorDoubleToInt32(FloatRegister src, Register dest,
   3380                                        Label* fail) {
   3381  ARMFPRegister iDbl(src, 64);
   3382  ARMRegister o64(dest, 64);
   3383 
   3384  Label handleZero;
   3385  Label fin;
   3386 
   3387  // Handle ±0 and NaN first.
   3388  Fcmp(iDbl, 0.0);
   3389  B(Assembler::Equal, &handleZero);
   3390  // NaN is always a bail condition, just bail directly.
   3391  B(Assembler::Overflow, fail);
   3392 
   3393  // Round towards negative infinity.
   3394  Fcvtms(o64, iDbl);
   3395 
   3396  // Sign extend lower 32 bits to test if the result isn't an Int32.
   3397  Cmp(o64, Operand(o64, vixl::SXTW));
   3398  B(NotEqual, fail);
   3399 
   3400  // Clear upper 32 bits.
   3401  Uxtw(o64, o64);
   3402  B(&fin);
   3403 
   3404  bind(&handleZero);
   3405  // Move the double into the output reg, if it is non-zero, then the original
   3406  // value was -0.0.
   3407  Fmov(o64, iDbl);
   3408  Cbnz(o64, fail);
   3409  bind(&fin);
   3410 }
   3411 
   3412 void MacroAssembler::ceilFloat32ToInt32(FloatRegister src, Register dest,
   3413                                        Label* fail) {
   3414  ARMFPRegister iFlt(src, 32);
   3415  ARMRegister o64(dest, 64);
   3416  ARMRegister o32(dest, 32);
   3417 
   3418  Label handleZero;
   3419  Label fin;
   3420 
   3421  // Round towards positive infinity.
   3422  Fcvtps(o64, iFlt);
   3423 
   3424  // Sign extend lower 32 bits to test if the result isn't an Int32.
   3425  Cmp(o64, Operand(o64, vixl::SXTW));
   3426  B(NotEqual, fail);
   3427 
   3428  // We have to check for (-1, -0] and NaN when the result is zero.
   3429  Cbz(o64, &handleZero);
   3430 
   3431  // Clear upper 32 bits.
   3432  Uxtw(o64, o64);
   3433  B(&fin);
   3434 
   3435  // Bail if the input is in (-1, -0] or NaN.
   3436  bind(&handleZero);
   3437  // Move the float into the output reg, if it is non-zero, then the original
   3438  // value wasn't +0.0.
   3439  Fmov(o32, iFlt);
   3440  Cbnz(o32, fail);
   3441  bind(&fin);
   3442 }
   3443 
   3444 void MacroAssembler::ceilDoubleToInt32(FloatRegister src, Register dest,
   3445                                       Label* fail) {
   3446  ARMFPRegister iDbl(src, 64);
   3447  ARMRegister o64(dest, 64);
   3448 
   3449  Label handleZero;
   3450  Label fin;
   3451 
   3452  // Round towards positive infinity.
   3453  Fcvtps(o64, iDbl);
   3454 
   3455  // Sign extend lower 32 bits to test if the result isn't an Int32.
   3456  Cmp(o64, Operand(o64, vixl::SXTW));
   3457  B(NotEqual, fail);
   3458 
   3459  // We have to check for (-1, -0] and NaN when the result is zero.
   3460  Cbz(o64, &handleZero);
   3461 
   3462  // Clear upper 32 bits.
   3463  Uxtw(o64, o64);
   3464  B(&fin);
   3465 
   3466  // Bail if the input is in (-1, -0] or NaN.
   3467  bind(&handleZero);
   3468  // Move the double into the output reg, if it is non-zero, then the original
   3469  // value wasn't +0.0.
   3470  Fmov(o64, iDbl);
   3471  Cbnz(o64, fail);
   3472  bind(&fin);
   3473 }
   3474 
   3475 void MacroAssembler::truncFloat32ToInt32(FloatRegister src, Register dest,
   3476                                         Label* fail) {
   3477  ARMFPRegister src32(src, 32);
   3478  ARMRegister dest32(dest, 32);
   3479  ARMRegister dest64(dest, 64);
   3480 
   3481  Label done, zeroCase;
   3482 
   3483  // Convert scalar to signed 64-bit fixed-point, rounding toward zero.
   3484  // In the case of overflow, the output is saturated.
   3485  // In the case of NaN and -0, the output is zero.
   3486  Fcvtzs(dest64, src32);
   3487 
   3488  // If the output was zero, worry about special cases.
   3489  Cbz(dest64, &zeroCase);
   3490 
   3491  // Sign extend lower 32 bits to test if the result isn't an Int32.
   3492  Cmp(dest64, Operand(dest64, vixl::SXTW));
   3493  B(NotEqual, fail);
   3494 
   3495  // Clear upper 32 bits.
   3496  Uxtw(dest64, dest64);
   3497 
   3498  // If the output was non-zero and wasn't saturated, just return it.
   3499  B(&done);
   3500 
   3501  // Handle the case of a zero output:
   3502  // 1. The input may have been NaN, requiring a failure.
   3503  // 2. The input may have been in (-1,-0], requiring a failure.
   3504  {
   3505    bind(&zeroCase);
   3506 
   3507    // Combine test for negative and NaN values using a single bitwise
   3508    // operation.
   3509    //
   3510    // | Decimal number | Bitwise representation |
   3511    // |----------------|------------------------|
   3512    // | -0             | 8000'0000              |
   3513    // | +0             | 0000'0000              |
   3514    // | +1             | 3f80'0000              |
   3515    // |  NaN (or +Inf) | 7fyx'xxxx, y >= 8      |
   3516    // | -NaN (or -Inf) | ffyx'xxxx, y >= 8      |
   3517    //
   3518    // If any of two most significant bits is set, the number isn't in [0, 1).
   3519    // (Recall that floating point numbers, except for NaN, are strictly ordered
   3520    // when comparing their bitwise representation as signed integers.)
   3521 
   3522    Fmov(dest32, src32);
   3523    Lsr(dest32, dest32, 30);
   3524    Cbnz(dest32, fail);
   3525  }
   3526 
   3527  bind(&done);
   3528 }
   3529 
   3530 void MacroAssembler::truncDoubleToInt32(FloatRegister src, Register dest,
   3531                                        Label* fail) {
   3532  ARMFPRegister src64(src, 64);
   3533  ARMRegister dest64(dest, 64);
   3534  ARMRegister dest32(dest, 32);
   3535 
   3536  Label done, zeroCase;
   3537 
   3538  // Convert scalar to signed 64-bit fixed-point, rounding toward zero.
   3539  // In the case of overflow, the output is saturated.
   3540  // In the case of NaN and -0, the output is zero.
   3541  Fcvtzs(dest64, src64);
   3542 
   3543  // If the output was zero, worry about special cases.
   3544  Cbz(dest64, &zeroCase);
   3545 
   3546  // Sign extend lower 32 bits to test if the result isn't an Int32.
   3547  Cmp(dest64, Operand(dest64, vixl::SXTW));
   3548  B(NotEqual, fail);
   3549 
   3550  // Clear upper 32 bits.
   3551  Uxtw(dest64, dest64);
   3552 
   3553  // If the output was non-zero and wasn't saturated, just return it.
   3554  B(&done);
   3555 
   3556  // Handle the case of a zero output:
   3557  // 1. The input may have been NaN, requiring a failure.
   3558  // 2. The input may have been in (-1,-0], requiring a failure.
   3559  {
   3560    bind(&zeroCase);
   3561 
   3562    // Combine test for negative and NaN values using a single bitwise
   3563    // operation.
   3564    //
   3565    // | Decimal number | Bitwise representation |
   3566    // |----------------|------------------------|
   3567    // | -0             | 8000'0000'0000'0000    |
   3568    // | +0             | 0000'0000'0000'0000    |
   3569    // | +1             | 3ff0'0000'0000'0000    |
   3570    // |  NaN (or +Inf) | 7ffx'xxxx'xxxx'xxxx    |
   3571    // | -NaN (or -Inf) | fffx'xxxx'xxxx'xxxx    |
   3572    //
   3573    // If any of two most significant bits is set, the number isn't in [0, 1).
   3574    // (Recall that floating point numbers, except for NaN, are strictly ordered
   3575    // when comparing their bitwise representation as signed integers.)
   3576 
   3577    Fmov(dest64, src64);
   3578    Lsr(dest64, dest64, 62);
   3579    Cbnz(dest64, fail);
   3580  }
   3581 
   3582  bind(&done);
   3583 }
   3584 
   3585 void MacroAssembler::roundFloat32ToInt32(FloatRegister src, Register dest,
   3586                                         FloatRegister temp, Label* fail) {
   3587  ARMFPRegister src32(src, 32);
   3588  ARMRegister dest32(dest, 32);
   3589  ARMRegister dest64(dest, 64);
   3590 
   3591  Label negative, saturated, done;
   3592 
   3593  // Branch to a slow path if input < 0.0 due to complicated rounding rules.
   3594  // Note that Fcmp with NaN unsets the negative flag.
   3595  Fcmp(src32, 0.0);
   3596  B(&negative, Assembler::Condition::lo);
   3597 
   3598  // Handle the simple case of a positive input, and also -0 and NaN.
   3599  // Rounding proceeds with consideration of the fractional part of the input:
   3600  // 1. If > 0.5, round to integer with higher absolute value (so, up).
   3601  // 2. If < 0.5, round to integer with lower absolute value (so, down).
   3602  // 3. If = 0.5, round to +Infinity (so, up).
   3603  {
   3604    // Convert to signed 64-bit integer, rounding halfway cases away from zero.
   3605    // In the case of overflow, the output is saturated.
   3606    // In the case of NaN and -0, the output is zero.
   3607    Fcvtas(dest64, src32);
   3608 
   3609    // In the case of zero, the input may have been NaN or -0, which must bail.
   3610    Cbnz(dest64, &saturated);
   3611 
   3612    // Combine test for -0 and NaN values using a single bitwise operation.
   3613    // See truncFloat32ToInt32 for an explanation.
   3614    Fmov(dest32, src32);
   3615    Lsr(dest32, dest32, 30);
   3616    Cbnz(dest32, fail);
   3617 
   3618    B(&done);
   3619  }
   3620 
   3621  // Handle the complicated case of a negative input.
   3622  // Rounding proceeds with consideration of the fractional part of the input:
   3623  // 1. If > 0.5, round to integer with higher absolute value (so, down).
   3624  // 2. If < 0.5, round to integer with lower absolute value (so, up).
   3625  // 3. If = 0.5, round to +Infinity (so, up).
   3626  bind(&negative);
   3627  {
   3628    // Inputs in [-0.5, 0) are rounded to -0. Fail.
   3629    loadConstantFloat32(-0.5f, temp);
   3630    branchFloat(Assembler::DoubleGreaterThanOrEqual, src, temp, fail);
   3631 
   3632    // Other negative inputs need the biggest double less than 0.5 added.
   3633    loadConstantFloat32(GetBiggestNumberLessThan(0.5f), temp);
   3634    addFloat32(src, temp);
   3635 
   3636    // Round all values toward -Infinity.
   3637    // In the case of overflow, the output is saturated.
   3638    // NaN and -0 are already handled by the "positive number" path above.
   3639    Fcvtms(dest64, temp);
   3640  }
   3641 
   3642  bind(&saturated);
   3643 
   3644  // Sign extend lower 32 bits to test if the result isn't an Int32.
   3645  Cmp(dest64, Operand(dest64, vixl::SXTW));
   3646  B(NotEqual, fail);
   3647 
   3648  // Clear upper 32 bits.
   3649  Uxtw(dest64, dest64);
   3650 
   3651  bind(&done);
   3652 }
   3653 
   3654 void MacroAssembler::roundDoubleToInt32(FloatRegister src, Register dest,
   3655                                        FloatRegister temp, Label* fail) {
   3656  ARMFPRegister src64(src, 64);
   3657  ARMRegister dest64(dest, 64);
   3658  ARMRegister dest32(dest, 32);
   3659 
   3660  Label negative, saturated, done;
   3661 
   3662  // Branch to a slow path if input < 0.0 due to complicated rounding rules.
   3663  // Note that Fcmp with NaN unsets the negative flag.
   3664  Fcmp(src64, 0.0);
   3665  B(&negative, Assembler::Condition::lo);
   3666 
   3667  // Handle the simple case of a positive input, and also -0 and NaN.
   3668  // Rounding proceeds with consideration of the fractional part of the input:
   3669  // 1. If > 0.5, round to integer with higher absolute value (so, up).
   3670  // 2. If < 0.5, round to integer with lower absolute value (so, down).
   3671  // 3. If = 0.5, round to +Infinity (so, up).
   3672  {
   3673    // Convert to signed 64-bit integer, rounding halfway cases away from zero.
   3674    // In the case of overflow, the output is saturated.
   3675    // In the case of NaN and -0, the output is zero.
   3676    Fcvtas(dest64, src64);
   3677 
   3678    // In the case of zero, the input may have been NaN or -0, which must bail.
   3679    Cbnz(dest64, &saturated);
   3680 
   3681    // Combine test for -0 and NaN values using a single bitwise operation.
   3682    // See truncDoubleToInt32 for an explanation.
   3683    Fmov(dest64, src64);
   3684    Lsr(dest64, dest64, 62);
   3685    Cbnz(dest64, fail);
   3686 
   3687    B(&done);
   3688  }
   3689 
   3690  // Handle the complicated case of a negative input.
   3691  // Rounding proceeds with consideration of the fractional part of the input:
   3692  // 1. If > 0.5, round to integer with higher absolute value (so, down).
   3693  // 2. If < 0.5, round to integer with lower absolute value (so, up).
   3694  // 3. If = 0.5, round to +Infinity (so, up).
   3695  bind(&negative);
   3696  {
   3697    // Inputs in [-0.5, 0) are rounded to -0. Fail.
   3698    loadConstantDouble(-0.5, temp);
   3699    branchDouble(Assembler::DoubleGreaterThanOrEqual, src, temp, fail);
   3700 
   3701    // Other negative inputs need the biggest double less than 0.5 added.
   3702    loadConstantDouble(GetBiggestNumberLessThan(0.5), temp);
   3703    addDouble(src, temp);
   3704 
   3705    // Round all values toward -Infinity.
   3706    // In the case of overflow, the output is saturated.
   3707    // NaN and -0 are already handled by the "positive number" path above.
   3708    Fcvtms(dest64, temp);
   3709  }
   3710 
   3711  bind(&saturated);
   3712 
   3713  // Sign extend lower 32 bits to test if the result isn't an Int32.
   3714  Cmp(dest64, Operand(dest64, vixl::SXTW));
   3715  B(NotEqual, fail);
   3716 
   3717  // Clear upper 32 bits.
   3718  Uxtw(dest64, dest64);
   3719 
   3720  bind(&done);
   3721 }
   3722 
   3723 void MacroAssembler::nearbyIntDouble(RoundingMode mode, FloatRegister src,
   3724                                     FloatRegister dest) {
   3725  switch (mode) {
   3726    case RoundingMode::Up:
   3727      frintp(ARMFPRegister(dest, 64), ARMFPRegister(src, 64));
   3728      return;
   3729    case RoundingMode::Down:
   3730      frintm(ARMFPRegister(dest, 64), ARMFPRegister(src, 64));
   3731      return;
   3732    case RoundingMode::NearestTiesToEven:
   3733      frintn(ARMFPRegister(dest, 64), ARMFPRegister(src, 64));
   3734      return;
   3735    case RoundingMode::TowardsZero:
   3736      frintz(ARMFPRegister(dest, 64), ARMFPRegister(src, 64));
   3737      return;
   3738  }
   3739  MOZ_CRASH("unexpected mode");
   3740 }
   3741 
   3742 void MacroAssembler::nearbyIntFloat32(RoundingMode mode, FloatRegister src,
   3743                                      FloatRegister dest) {
   3744  switch (mode) {
   3745    case RoundingMode::Up:
   3746      frintp(ARMFPRegister(dest, 32), ARMFPRegister(src, 32));
   3747      return;
   3748    case RoundingMode::Down:
   3749      frintm(ARMFPRegister(dest, 32), ARMFPRegister(src, 32));
   3750      return;
   3751    case RoundingMode::NearestTiesToEven:
   3752      frintn(ARMFPRegister(dest, 32), ARMFPRegister(src, 32));
   3753      return;
   3754    case RoundingMode::TowardsZero:
   3755      frintz(ARMFPRegister(dest, 32), ARMFPRegister(src, 32));
   3756      return;
   3757  }
   3758  MOZ_CRASH("unexpected mode");
   3759 }
   3760 
   3761 void MacroAssembler::copySignDouble(FloatRegister lhs, FloatRegister rhs,
   3762                                    FloatRegister output) {
   3763  ScratchDoubleScope scratch(*this);
   3764 
   3765  // Double with only the sign bit set
   3766  loadConstantDouble(-0.0, scratch);
   3767 
   3768  if (lhs != output) {
   3769    moveDouble(lhs, output);
   3770  }
   3771 
   3772  bit(ARMFPRegister(output.encoding(), vixl::VectorFormat::kFormat8B),
   3773      ARMFPRegister(rhs.encoding(), vixl::VectorFormat::kFormat8B),
   3774      ARMFPRegister(scratch.encoding(), vixl::VectorFormat::kFormat8B));
   3775 }
   3776 
   3777 void MacroAssembler::copySignFloat32(FloatRegister lhs, FloatRegister rhs,
   3778                                     FloatRegister output) {
   3779  ScratchFloat32Scope scratch(*this);
   3780 
   3781  // Float with only the sign bit set
   3782  loadConstantFloat32(-0.0f, scratch);
   3783 
   3784  if (lhs != output) {
   3785    moveFloat32(lhs, output);
   3786  }
   3787 
   3788  bit(ARMFPRegister(output.encoding(), vixl::VectorFormat::kFormat8B),
   3789      ARMFPRegister(rhs.encoding(), vixl::VectorFormat::kFormat8B),
   3790      ARMFPRegister(scratch.encoding(), vixl::VectorFormat::kFormat8B));
   3791 }
   3792 
   3793 void MacroAssembler::shiftIndex32AndAdd(Register indexTemp32, int shift,
   3794                                        Register pointer) {
   3795  Add(ARMRegister(pointer, 64), ARMRegister(pointer, 64),
   3796      Operand(ARMRegister(indexTemp32, 64), vixl::LSL, shift));
   3797 }
   3798 
   3799 void MacroAssembler::wasmMarkCallAsSlow() {
   3800  // Use mov() instead of Mov() to ensure this no-op move isn't elided.
   3801  vixl::MacroAssembler::mov(x28, x28);
   3802 }
   3803 
   3804 const int32_t SlowCallMarker = 0xaa1c03fc;
   3805 
   3806 void MacroAssembler::wasmCheckSlowCallsite(Register ra, Label* notSlow,
   3807                                           Register temp1, Register temp2) {
   3808  MOZ_ASSERT(ra != temp2);
   3809  Ldr(W(temp2), MemOperand(X(ra), 0));
   3810  Cmp(W(temp2), Operand(SlowCallMarker));
   3811  B(Assembler::NotEqual, notSlow);
   3812 }
   3813 
   3814 CodeOffset MacroAssembler::wasmMarkedSlowCall(const wasm::CallSiteDesc& desc,
   3815                                              const Register reg) {
   3816  AutoForbidPoolsAndNops afp(this, !GetStackPointer64().Is(vixl::sp) ? 3 : 2);
   3817  CodeOffset offset = call(desc, reg);
   3818  wasmMarkCallAsSlow();
   3819  return offset;
   3820 }
   3821 
   3822 //}}} check_macroassembler_style
   3823 
   3824 }  // namespace jit
   3825 }  // namespace js