tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

CodeGenerator-x86-shared.cpp (121459B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 * This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #include "jit/x86-shared/CodeGenerator-x86-shared.h"
      8 
      9 #include "mozilla/DebugOnly.h"
     10 #include "mozilla/MathAlgorithms.h"
     11 
     12 #include "jit/CodeGenerator.h"
     13 #include "jit/InlineScriptTree.h"
     14 #include "jit/JitRuntime.h"
     15 #include "jit/RangeAnalysis.h"
     16 #include "jit/ReciprocalMulConstants.h"
     17 #include "js/ScalarType.h"  // js::Scalar::Type
     18 
     19 #include "jit/MacroAssembler-inl.h"
     20 #include "jit/shared/CodeGenerator-shared-inl.h"
     21 
     22 using namespace js;
     23 using namespace js::jit;
     24 
     25 using mozilla::Abs;
     26 using mozilla::DebugOnly;
     27 using mozilla::FloorLog2;
     28 using mozilla::NegativeInfinity;
     29 
     30 using JS::GenericNaN;
     31 
     32 namespace js {
     33 namespace jit {
     34 
     35 CodeGeneratorX86Shared::CodeGeneratorX86Shared(
     36    MIRGenerator* gen, LIRGraph* graph, MacroAssembler* masm,
     37    const wasm::CodeMetadata* wasmCodeMeta)
     38    : CodeGeneratorShared(gen, graph, masm, wasmCodeMeta) {}
     39 
     40 #ifdef JS_PUNBOX64
     41 Operand CodeGeneratorX86Shared::ToOperandOrRegister64(
     42    const LInt64Allocation& input) {
     43  return ToOperand(input.value());
     44 }
     45 #else
     46 Register64 CodeGeneratorX86Shared::ToOperandOrRegister64(
     47    const LInt64Allocation& input) {
     48  return ToRegister64(input);
     49 }
     50 #endif
     51 
     52 void CodeGeneratorX86Shared::emitBranch(Assembler::Condition cond,
     53                                        MBasicBlock* mirTrue,
     54                                        MBasicBlock* mirFalse) {
     55  if (isNextBlock(mirFalse->lir())) {
     56    jumpToBlock(mirTrue, cond);
     57  } else {
     58    jumpToBlock(mirFalse, Assembler::InvertCondition(cond));
     59    jumpToBlock(mirTrue);
     60  }
     61 }
     62 
     63 void CodeGeneratorX86Shared::emitBranch(Assembler::DoubleCondition cond,
     64                                        MBasicBlock* ifTrue,
     65                                        MBasicBlock* ifFalse,
     66                                        Assembler::NaNCond ifNaN) {
     67  if (ifNaN == Assembler::NaN_IsFalse) {
     68    jumpToBlock(ifFalse, Assembler::Parity);
     69  } else if (ifNaN == Assembler::NaN_IsTrue) {
     70    jumpToBlock(ifTrue, Assembler::Parity);
     71  }
     72  emitBranch(Assembler::ConditionFromDoubleCondition(cond), ifTrue, ifFalse);
     73 }
     74 
     75 void CodeGenerator::visitTestDAndBranch(LTestDAndBranch* test) {
     76  const LAllocation* opd = test->input();
     77 
     78  // vucomisd flags:
     79  //             Z  P  C
     80  //            ---------
     81  //      NaN    1  1  1
     82  //        >    0  0  0
     83  //        <    0  0  1
     84  //        =    1  0  0
     85  //
     86  // NaN is falsey, so comparing against 0 and then using the Z flag is
     87  // enough to determine which branch to take.
     88  ScratchDoubleScope scratch(masm);
     89  masm.zeroDouble(scratch);
     90  masm.vucomisd(scratch, ToFloatRegister(opd));
     91  emitBranch(Assembler::NotEqual, test->ifTrue(), test->ifFalse());
     92 }
     93 
     94 void CodeGenerator::visitTestFAndBranch(LTestFAndBranch* test) {
     95  const LAllocation* opd = test->input();
     96  // vucomiss flags are the same as doubles; see comment above
     97  {
     98    ScratchFloat32Scope scratch(masm);
     99    masm.zeroFloat32(scratch);
    100    masm.vucomiss(scratch, ToFloatRegister(opd));
    101  }
    102  emitBranch(Assembler::NotEqual, test->ifTrue(), test->ifFalse());
    103 }
    104 
    105 static Assembler::DoubleCondition ToDoubleCondition(FloatRegister lhs,
    106                                                    FloatRegister rhs,
    107                                                    JSOp op) {
    108  if (lhs == rhs) {
    109    switch (op) {
    110      case JSOp::Eq:
    111      case JSOp::StrictEq:
    112      case JSOp::Le:
    113      case JSOp::Ge:
    114        return Assembler::DoubleOrdered;
    115      case JSOp::Ne:
    116      case JSOp::StrictNe:
    117        return Assembler::DoubleUnordered;
    118      default:
    119        break;
    120    }
    121  }
    122  return JSOpToDoubleCondition(op);
    123 }
    124 
    125 void CodeGenerator::visitCompareD(LCompareD* comp) {
    126  FloatRegister lhs = ToFloatRegister(comp->left());
    127  FloatRegister rhs = ToFloatRegister(comp->right());
    128  Register output = ToRegister(comp->output());
    129 
    130  Assembler::DoubleCondition cond =
    131      ToDoubleCondition(lhs, rhs, comp->mir()->jsop());
    132 
    133  Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond);
    134  if (comp->mir()->operandsAreNeverNaN()) {
    135    nanCond = Assembler::NaN_HandledByCond;
    136  }
    137 
    138  bool destIsZero = masm.maybeEmitSetZeroByteRegister(output);
    139  masm.compareDouble(cond, lhs, rhs);
    140  masm.emitSet(Assembler::ConditionFromDoubleCondition(cond), output,
    141               destIsZero, nanCond);
    142 }
    143 
    144 void CodeGenerator::visitCompareF(LCompareF* comp) {
    145  FloatRegister lhs = ToFloatRegister(comp->left());
    146  FloatRegister rhs = ToFloatRegister(comp->right());
    147  Register output = ToRegister(comp->output());
    148 
    149  Assembler::DoubleCondition cond =
    150      ToDoubleCondition(lhs, rhs, comp->mir()->jsop());
    151 
    152  Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond);
    153  if (comp->mir()->operandsAreNeverNaN()) {
    154    nanCond = Assembler::NaN_HandledByCond;
    155  }
    156 
    157  bool destIsZero = masm.maybeEmitSetZeroByteRegister(output);
    158  masm.compareFloat(cond, lhs, rhs);
    159  masm.emitSet(Assembler::ConditionFromDoubleCondition(cond), output,
    160               destIsZero, nanCond);
    161 }
    162 
    163 void CodeGenerator::visitNotD(LNotD* ins) {
    164  FloatRegister opd = ToFloatRegister(ins->input());
    165  Register output = ToRegister(ins->output());
    166 
    167  // Not returns true if the input is a NaN. We don't have to worry about
    168  // it if we know the input is never NaN though.
    169  Assembler::NaNCond nanCond = Assembler::NaN_IsTrue;
    170  if (ins->mir()->operandIsNeverNaN()) {
    171    nanCond = Assembler::NaN_HandledByCond;
    172  }
    173 
    174  bool destIsZero = masm.maybeEmitSetZeroByteRegister(output);
    175  ScratchDoubleScope scratch(masm);
    176  masm.zeroDouble(scratch);
    177  masm.compareDouble(Assembler::DoubleEqualOrUnordered, opd, scratch);
    178  masm.emitSet(Assembler::Equal, output, destIsZero, nanCond);
    179 }
    180 
    181 void CodeGenerator::visitNotF(LNotF* ins) {
    182  FloatRegister opd = ToFloatRegister(ins->input());
    183  Register output = ToRegister(ins->output());
    184 
    185  // Not returns true if the input is a NaN. We don't have to worry about
    186  // it if we know the input is never NaN though.
    187  Assembler::NaNCond nanCond = Assembler::NaN_IsTrue;
    188  if (ins->mir()->operandIsNeverNaN()) {
    189    nanCond = Assembler::NaN_HandledByCond;
    190  }
    191 
    192  bool destIsZero = masm.maybeEmitSetZeroByteRegister(output);
    193  ScratchFloat32Scope scratch(masm);
    194  masm.zeroFloat32(scratch);
    195  masm.compareFloat(Assembler::DoubleEqualOrUnordered, opd, scratch);
    196  masm.emitSet(Assembler::Equal, output, destIsZero, nanCond);
    197 }
    198 
    199 void CodeGenerator::visitCompareDAndBranch(LCompareDAndBranch* comp) {
    200  FloatRegister lhs = ToFloatRegister(comp->left());
    201  FloatRegister rhs = ToFloatRegister(comp->right());
    202 
    203  Assembler::DoubleCondition cond =
    204      ToDoubleCondition(lhs, rhs, comp->cmpMir()->jsop());
    205 
    206  Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond);
    207  if (comp->cmpMir()->operandsAreNeverNaN()) {
    208    nanCond = Assembler::NaN_HandledByCond;
    209  }
    210 
    211  masm.compareDouble(cond, lhs, rhs);
    212  emitBranch(cond, comp->ifTrue(), comp->ifFalse(), nanCond);
    213 }
    214 
    215 void CodeGenerator::visitCompareFAndBranch(LCompareFAndBranch* comp) {
    216  FloatRegister lhs = ToFloatRegister(comp->left());
    217  FloatRegister rhs = ToFloatRegister(comp->right());
    218 
    219  Assembler::DoubleCondition cond =
    220      ToDoubleCondition(lhs, rhs, comp->cmpMir()->jsop());
    221 
    222  Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond);
    223  if (comp->cmpMir()->operandsAreNeverNaN()) {
    224    nanCond = Assembler::NaN_HandledByCond;
    225  }
    226 
    227  masm.compareFloat(cond, lhs, rhs);
    228  emitBranch(cond, comp->ifTrue(), comp->ifFalse(), nanCond);
    229 }
    230 
    231 void CodeGenerator::visitWasmStackArg(LWasmStackArg* ins) {
    232  const MWasmStackArg* mir = ins->mir();
    233  Address dst(StackPointer, mir->spOffset());
    234  if (ins->arg()->isConstant()) {
    235    masm.storePtr(ImmWord(ToInt32(ins->arg())), dst);
    236  } else if (ins->arg()->isGeneralReg()) {
    237    masm.storePtr(ToRegister(ins->arg()), dst);
    238  } else {
    239    switch (mir->input()->type()) {
    240      case MIRType::Double:
    241        masm.storeDouble(ToFloatRegister(ins->arg()), dst);
    242        return;
    243      case MIRType::Float32:
    244        masm.storeFloat32(ToFloatRegister(ins->arg()), dst);
    245        return;
    246 #ifdef ENABLE_WASM_SIMD
    247      case MIRType::Simd128:
    248        masm.storeUnalignedSimd128(ToFloatRegister(ins->arg()), dst);
    249        return;
    250 #endif
    251      default:
    252        break;
    253    }
    254    MOZ_CRASH("unexpected mir type in WasmStackArg");
    255  }
    256 }
    257 
    258 void CodeGenerator::visitWasmStackArgI64(LWasmStackArgI64* ins) {
    259  const MWasmStackArg* mir = ins->mir();
    260  Address dst(StackPointer, mir->spOffset());
    261  if (IsConstant(ins->arg())) {
    262    masm.store64(Imm64(ToInt64(ins->arg())), dst);
    263  } else {
    264    masm.store64(ToRegister64(ins->arg()), dst);
    265  }
    266 }
    267 
    268 void CodeGenerator::visitWasmSelect(LWasmSelect* ins) {
    269  MIRType mirType = ins->mir()->type();
    270 
    271  Register cond = ToRegister(ins->condExpr());
    272  Operand falseExpr = ToOperand(ins->falseExpr());
    273 
    274  masm.test32(cond, cond);
    275 
    276  if (mirType == MIRType::Int32 || mirType == MIRType::WasmAnyRef) {
    277    Register out = ToRegister(ins->output());
    278    MOZ_ASSERT(ToRegister(ins->trueExpr()) == out,
    279               "true expr input is reused for output");
    280    if (mirType == MIRType::Int32) {
    281      masm.cmovz32(falseExpr, out);
    282    } else {
    283      masm.cmovzPtr(falseExpr, out);
    284    }
    285    return;
    286  }
    287 
    288  FloatRegister out = ToFloatRegister(ins->output());
    289  MOZ_ASSERT(ToFloatRegister(ins->trueExpr()) == out,
    290             "true expr input is reused for output");
    291 
    292  Label done;
    293  masm.j(Assembler::NonZero, &done);
    294 
    295  if (mirType == MIRType::Float32) {
    296    if (falseExpr.kind() == Operand::FPREG) {
    297      masm.moveFloat32(ToFloatRegister(ins->falseExpr()), out);
    298    } else {
    299      masm.loadFloat32(falseExpr, out);
    300    }
    301  } else if (mirType == MIRType::Double) {
    302    if (falseExpr.kind() == Operand::FPREG) {
    303      masm.moveDouble(ToFloatRegister(ins->falseExpr()), out);
    304    } else {
    305      masm.loadDouble(falseExpr, out);
    306    }
    307  } else if (mirType == MIRType::Simd128) {
    308    if (falseExpr.kind() == Operand::FPREG) {
    309      masm.moveSimd128(ToFloatRegister(ins->falseExpr()), out);
    310    } else {
    311      masm.loadUnalignedSimd128(falseExpr, out);
    312    }
    313  } else {
    314    MOZ_CRASH("unhandled type in visitWasmSelect!");
    315  }
    316 
    317  masm.bind(&done);
    318 }
    319 
    320 void CodeGenerator::visitAsmJSLoadHeap(LAsmJSLoadHeap* ins) {
    321  const MAsmJSLoadHeap* mir = ins->mir();
    322  MOZ_ASSERT(mir->access().offset64() == 0);
    323 
    324  const LAllocation* ptr = ins->ptr();
    325  const LAllocation* boundsCheckLimit = ins->boundsCheckLimit();
    326  AnyRegister out = ToAnyRegister(ins->output());
    327 
    328  Scalar::Type accessType = mir->accessType();
    329 
    330  OutOfLineCode* ool = nullptr;
    331  if (mir->needsBoundsCheck()) {
    332    ool = new (alloc()) LambdaOutOfLineCode([=, this](OutOfLineCode& ool) {
    333      switch (accessType) {
    334        case Scalar::Int64:
    335        case Scalar::BigInt64:
    336        case Scalar::BigUint64:
    337        case Scalar::Simd128:
    338        case Scalar::Float16:
    339        case Scalar::MaxTypedArrayViewType:
    340          MOZ_CRASH("unexpected array type");
    341        case Scalar::Float32:
    342          masm.loadConstantFloat32(float(GenericNaN()), out.fpu());
    343          break;
    344        case Scalar::Float64:
    345          masm.loadConstantDouble(GenericNaN(), out.fpu());
    346          break;
    347        case Scalar::Int8:
    348        case Scalar::Uint8:
    349        case Scalar::Int16:
    350        case Scalar::Uint16:
    351        case Scalar::Int32:
    352        case Scalar::Uint32:
    353        case Scalar::Uint8Clamped:
    354          Register destReg = out.gpr();
    355          masm.mov(ImmWord(0), destReg);
    356          break;
    357      }
    358      masm.jmp(ool.rejoin());
    359    });
    360    addOutOfLineCode(ool, mir);
    361 
    362    masm.wasmBoundsCheck32(Assembler::AboveOrEqual, ToRegister(ptr),
    363                           ToRegister(boundsCheckLimit), ool->entry());
    364  }
    365 
    366  Operand srcAddr = toMemoryAccessOperand(ins, 0);
    367  masm.wasmLoad(mir->access(), srcAddr, out);
    368 
    369  if (ool) {
    370    masm.bind(ool->rejoin());
    371  }
    372 }
    373 
    374 void CodeGenerator::visitAsmJSStoreHeap(LAsmJSStoreHeap* ins) {
    375  const MAsmJSStoreHeap* mir = ins->mir();
    376 
    377  const LAllocation* ptr = ins->ptr();
    378  const LAllocation* value = ins->value();
    379  const LAllocation* boundsCheckLimit = ins->boundsCheckLimit();
    380 
    381  Label rejoin;
    382  if (mir->needsBoundsCheck()) {
    383    masm.wasmBoundsCheck32(Assembler::AboveOrEqual, ToRegister(ptr),
    384                           ToRegister(boundsCheckLimit), &rejoin);
    385  }
    386 
    387  Operand dstAddr = toMemoryAccessOperand(ins, 0);
    388  masm.wasmStore(mir->access(), ToAnyRegister(value), dstAddr);
    389 
    390  if (rejoin.used()) {
    391    masm.bind(&rejoin);
    392  }
    393 }
    394 
    395 void CodeGenerator::visitWasmAddOffset(LWasmAddOffset* lir) {
    396  MWasmAddOffset* mir = lir->mir();
    397  Register base = ToRegister(lir->base());
    398  Register out = ToRegister(lir->output());
    399 
    400  if (base != out) {
    401    masm.move32(base, out);
    402  }
    403  masm.add32(Imm32(mir->offset()), out);
    404  auto* ool = new (alloc()) LambdaOutOfLineCode([=, this](OutOfLineCode& ool) {
    405    masm.wasmTrap(wasm::Trap::OutOfBounds, mir->trapSiteDesc());
    406  });
    407  addOutOfLineCode(ool, mir);
    408  masm.j(Assembler::CarrySet, ool->entry());
    409 }
    410 
    411 void CodeGenerator::visitWasmAddOffset64(LWasmAddOffset64* lir) {
    412  MWasmAddOffset* mir = lir->mir();
    413  Register64 base = ToRegister64(lir->base());
    414  Register64 out = ToOutRegister64(lir);
    415 
    416  if (base != out) {
    417    masm.move64(base, out);
    418  }
    419  masm.add64(Imm64(mir->offset()), out);
    420  auto* ool = new (alloc()) LambdaOutOfLineCode([=, this](OutOfLineCode& ool) {
    421    masm.wasmTrap(wasm::Trap::OutOfBounds, mir->trapSiteDesc());
    422  });
    423  addOutOfLineCode(ool, mir);
    424  masm.j(Assembler::CarrySet, ool->entry());
    425 }
    426 
    427 void CodeGenerator::visitWasmTruncateToInt32(LWasmTruncateToInt32* lir) {
    428  FloatRegister input = ToFloatRegister(lir->input());
    429  Register output = ToRegister(lir->output());
    430 
    431  MWasmTruncateToInt32* mir = lir->mir();
    432  MIRType inputType = mir->input()->type();
    433 
    434  MOZ_ASSERT(inputType == MIRType::Double || inputType == MIRType::Float32);
    435 
    436  auto* ool = new (alloc()) OutOfLineWasmTruncateCheck(mir, input, output);
    437  addOutOfLineCode(ool, mir);
    438 
    439  Label* oolEntry = ool->entry();
    440  if (mir->isUnsigned()) {
    441    if (inputType == MIRType::Double) {
    442      masm.wasmTruncateDoubleToUInt32(input, output, mir->isSaturating(),
    443                                      oolEntry);
    444    } else if (inputType == MIRType::Float32) {
    445      masm.wasmTruncateFloat32ToUInt32(input, output, mir->isSaturating(),
    446                                       oolEntry);
    447    } else {
    448      MOZ_CRASH("unexpected type");
    449    }
    450    if (mir->isSaturating()) {
    451      masm.bind(ool->rejoin());
    452    }
    453    return;
    454  }
    455 
    456  if (inputType == MIRType::Double) {
    457    masm.wasmTruncateDoubleToInt32(input, output, mir->isSaturating(),
    458                                   oolEntry);
    459  } else if (inputType == MIRType::Float32) {
    460    masm.wasmTruncateFloat32ToInt32(input, output, mir->isSaturating(),
    461                                    oolEntry);
    462  } else {
    463    MOZ_CRASH("unexpected type");
    464  }
    465 
    466  masm.bind(ool->rejoin());
    467 }
    468 
    469 bool CodeGeneratorX86Shared::generateOutOfLineCode() {
    470  if (!CodeGeneratorShared::generateOutOfLineCode()) {
    471    return false;
    472  }
    473 
    474  if (deoptLabel_.used()) {
    475    // All non-table-based bailouts will go here.
    476    masm.bind(&deoptLabel_);
    477 
    478    // Push the frame size, so the handler can recover the IonScript.
    479    masm.push(Imm32(frameSize()));
    480 
    481    TrampolinePtr handler = gen->jitRuntime()->getGenericBailoutHandler();
    482    masm.jump(handler);
    483  }
    484 
    485  return !masm.oom();
    486 }
    487 
    488 class BailoutJump {
    489  Assembler::Condition cond_;
    490 
    491 public:
    492  explicit BailoutJump(Assembler::Condition cond) : cond_(cond) {}
    493 #ifdef JS_CODEGEN_X86
    494  void operator()(MacroAssembler& masm, uint8_t* code) const {
    495    masm.j(cond_, ImmPtr(code), RelocationKind::HARDCODED);
    496  }
    497 #endif
    498  void operator()(MacroAssembler& masm, Label* label) const {
    499    masm.j(cond_, label);
    500  }
    501 };
    502 
    503 class BailoutLabel {
    504  Label* label_;
    505 
    506 public:
    507  explicit BailoutLabel(Label* label) : label_(label) {}
    508 #ifdef JS_CODEGEN_X86
    509  void operator()(MacroAssembler& masm, uint8_t* code) const {
    510    masm.retarget(label_, ImmPtr(code), RelocationKind::HARDCODED);
    511  }
    512 #endif
    513  void operator()(MacroAssembler& masm, Label* label) const {
    514    masm.retarget(label_, label);
    515  }
    516 };
    517 
    518 template <typename T>
    519 void CodeGeneratorX86Shared::bailout(const T& binder, LSnapshot* snapshot) {
    520  encode(snapshot);
    521 
    522  // All bailout code is associated with the bytecodeSite of the block we are
    523  // bailing out from.
    524  InlineScriptTree* tree = snapshot->mir()->block()->trackedTree();
    525  auto* ool = new (alloc()) LambdaOutOfLineCode([=, this](OutOfLineCode& ool) {
    526    masm.push(Imm32(snapshot->snapshotOffset()));
    527    masm.jmp(&deoptLabel_);
    528  });
    529  addOutOfLineCode(ool,
    530                   new (alloc()) BytecodeSite(tree, tree->script()->code()));
    531 
    532  binder(masm, ool->entry());
    533 }
    534 
    535 void CodeGeneratorX86Shared::bailoutIf(Assembler::Condition condition,
    536                                       LSnapshot* snapshot) {
    537  bailout(BailoutJump(condition), snapshot);
    538 }
    539 
    540 void CodeGeneratorX86Shared::bailoutIf(Assembler::DoubleCondition condition,
    541                                       LSnapshot* snapshot) {
    542  MOZ_ASSERT(Assembler::NaNCondFromDoubleCondition(condition) ==
    543             Assembler::NaN_HandledByCond);
    544  bailoutIf(Assembler::ConditionFromDoubleCondition(condition), snapshot);
    545 }
    546 
    547 void CodeGeneratorX86Shared::bailoutFrom(Label* label, LSnapshot* snapshot) {
    548  MOZ_ASSERT_IF(!masm.oom(), label->used() && !label->bound());
    549  bailout(BailoutLabel(label), snapshot);
    550 }
    551 
    552 void CodeGeneratorX86Shared::bailout(LSnapshot* snapshot) {
    553  Label label;
    554  masm.jump(&label);
    555  bailoutFrom(&label, snapshot);
    556 }
    557 
    558 void CodeGenerator::visitMinMaxD(LMinMaxD* ins) {
    559  FloatRegister first = ToFloatRegister(ins->first());
    560  FloatRegister second = ToFloatRegister(ins->second());
    561 #ifdef DEBUG
    562  FloatRegister output = ToFloatRegister(ins->output());
    563  MOZ_ASSERT(first == output);
    564 #endif
    565 
    566  bool handleNaN = !ins->mir()->range() || ins->mir()->range()->canBeNaN();
    567 
    568  if (ins->mir()->isMax()) {
    569    masm.maxDouble(second, first, handleNaN);
    570  } else {
    571    masm.minDouble(second, first, handleNaN);
    572  }
    573 }
    574 
    575 void CodeGenerator::visitMinMaxF(LMinMaxF* ins) {
    576  FloatRegister first = ToFloatRegister(ins->first());
    577  FloatRegister second = ToFloatRegister(ins->second());
    578 #ifdef DEBUG
    579  FloatRegister output = ToFloatRegister(ins->output());
    580  MOZ_ASSERT(first == output);
    581 #endif
    582 
    583  bool handleNaN = !ins->mir()->range() || ins->mir()->range()->canBeNaN();
    584 
    585  if (ins->mir()->isMax()) {
    586    masm.maxFloat32(second, first, handleNaN);
    587  } else {
    588    masm.minFloat32(second, first, handleNaN);
    589  }
    590 }
    591 
    592 void CodeGenerator::visitPowHalfD(LPowHalfD* ins) {
    593  FloatRegister input = ToFloatRegister(ins->input());
    594  FloatRegister output = ToFloatRegister(ins->output());
    595 
    596  ScratchDoubleScope scratch(masm);
    597 
    598  Label done, sqrt;
    599 
    600  if (!ins->mir()->operandIsNeverNegativeInfinity()) {
    601    // Branch if not -Infinity.
    602    masm.loadConstantDouble(NegativeInfinity<double>(), scratch);
    603 
    604    Assembler::DoubleCondition cond = Assembler::DoubleNotEqualOrUnordered;
    605    if (ins->mir()->operandIsNeverNaN()) {
    606      cond = Assembler::DoubleNotEqual;
    607    }
    608    masm.branchDouble(cond, input, scratch, &sqrt);
    609 
    610    // Math.pow(-Infinity, 0.5) == Infinity.
    611    masm.zeroDouble(output);
    612    masm.subDouble(scratch, output);
    613    masm.jump(&done);
    614 
    615    masm.bind(&sqrt);
    616  }
    617 
    618  if (!ins->mir()->operandIsNeverNegativeZero()) {
    619    // Math.pow(-0, 0.5) == 0 == Math.pow(0, 0.5).
    620    // Adding 0 converts any -0 to 0.
    621    masm.zeroDouble(scratch);
    622    masm.addDouble(input, scratch);
    623    masm.vsqrtsd(scratch, output, output);
    624  } else {
    625    masm.vsqrtsd(input, output, output);
    626  }
    627 
    628  masm.bind(&done);
    629 }
    630 
    631 void CodeGeneratorX86Shared::emitUndoALUOperationOOL(LInstruction* ins) {
    632  Register reg = ToRegister(ins->getDef(0));
    633 
    634  DebugOnly<LAllocation*> lhs = ins->getOperand(0);
    635  LAllocation* rhs = ins->getOperand(1);
    636 
    637  MOZ_ASSERT(reg == ToRegister(lhs));
    638  MOZ_ASSERT_IF(rhs->isGeneralReg(), reg != ToRegister(rhs));
    639 
    640  // Undo the effect of the ALU operation, which was performed on the output
    641  // register and overflowed. Writing to the output register clobbered an
    642  // input reg, and the original value of the input needs to be recovered
    643  // to satisfy the constraint imposed by any RECOVERED_INPUT operands to
    644  // the bailout snapshot.
    645 
    646  if (rhs->isConstant()) {
    647    Imm32 constant(ToInt32(rhs));
    648    if (ins->isAddI()) {
    649      masm.subl(constant, reg);
    650    } else {
    651      masm.addl(constant, reg);
    652    }
    653  } else {
    654    if (ins->isAddI()) {
    655      masm.subl(ToOperand(rhs), reg);
    656    } else {
    657      masm.addl(ToOperand(rhs), reg);
    658    }
    659  }
    660 
    661  bailout(ins->snapshot());
    662 }
    663 
    664 void CodeGenerator::visitAddI(LAddI* ins) {
    665  Register lhs = ToRegister(ins->lhs());
    666  const LAllocation* rhs = ins->rhs();
    667  Register out = ToRegister(ins->output());
    668 
    669  if (rhs->isConstant()) {
    670    if (lhs != out) {
    671      MOZ_ASSERT(!ins->snapshot());
    672      // Special case to lower the add to LEA instruction.
    673      masm.add32(Imm32(ToInt32(rhs)), lhs, out);
    674    } else {
    675      masm.addl(Imm32(ToInt32(rhs)), lhs);
    676    }
    677  } else {
    678    MOZ_ASSERT(out == lhs);
    679    masm.addl(ToOperand(rhs), lhs);
    680  }
    681 
    682  if (ins->snapshot()) {
    683    if (ins->recoversInput()) {
    684      auto* ool = new (alloc()) LambdaOutOfLineCode(
    685          [=, this](OutOfLineCode& ool) { emitUndoALUOperationOOL(ins); });
    686      addOutOfLineCode(ool, ins->mir());
    687      masm.j(Assembler::Overflow, ool->entry());
    688    } else {
    689      bailoutIf(Assembler::Overflow, ins->snapshot());
    690    }
    691  }
    692 }
    693 
    694 void CodeGenerator::visitAddI64(LAddI64* lir) {
    695  Register64 lhs = ToRegister64(lir->lhs());
    696  LInt64Allocation rhs = lir->rhs();
    697 
    698  MOZ_ASSERT(ToOutRegister64(lir) == lhs);
    699 
    700  if (IsConstant(rhs)) {
    701    masm.add64(Imm64(ToInt64(rhs)), lhs);
    702    return;
    703  }
    704 
    705  masm.add64(ToOperandOrRegister64(rhs), lhs);
    706 }
    707 
    708 void CodeGenerator::visitSubI(LSubI* ins) {
    709  Register lhs = ToRegister(ins->lhs());
    710  const LAllocation* rhs = ins->rhs();
    711 
    712  MOZ_ASSERT(ToRegister(ins->output()) == lhs);
    713 
    714  if (rhs->isConstant()) {
    715    masm.subl(Imm32(ToInt32(rhs)), lhs);
    716  } else {
    717    masm.subl(ToOperand(rhs), lhs);
    718  }
    719 
    720  if (ins->snapshot()) {
    721    if (ins->recoversInput()) {
    722      auto* ool = new (alloc()) LambdaOutOfLineCode(
    723          [=, this](OutOfLineCode& ool) { emitUndoALUOperationOOL(ins); });
    724      addOutOfLineCode(ool, ins->mir());
    725      masm.j(Assembler::Overflow, ool->entry());
    726    } else {
    727      bailoutIf(Assembler::Overflow, ins->snapshot());
    728    }
    729  }
    730 }
    731 
    732 void CodeGenerator::visitSubI64(LSubI64* lir) {
    733  LInt64Allocation lhs = lir->lhs();
    734  LInt64Allocation rhs = lir->rhs();
    735 
    736  MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs));
    737 
    738  if (IsConstant(rhs)) {
    739    masm.sub64(Imm64(ToInt64(rhs)), ToRegister64(lhs));
    740    return;
    741  }
    742 
    743  masm.sub64(ToOperandOrRegister64(rhs), ToRegister64(lhs));
    744 }
    745 
    746 void CodeGenerator::visitMulI(LMulI* ins) {
    747  Register lhs = ToRegister(ins->lhs());
    748  const LAllocation* rhs = ins->rhs();
    749  Register out = ToRegister(ins->output());
    750 
    751  MMul* mul = ins->mir();
    752  MOZ_ASSERT_IF(mul->mode() == MMul::Integer,
    753                !mul->canBeNegativeZero() && !mul->canOverflow());
    754 
    755  if (rhs->isConstant()) {
    756    // Bailout on -0.0
    757    int32_t constant = ToInt32(rhs);
    758    if (mul->canBeNegativeZero() && constant <= 0) {
    759      Assembler::Condition bailoutCond =
    760          (constant == 0) ? Assembler::Signed : Assembler::Equal;
    761      masm.test32(lhs, lhs);
    762      bailoutIf(bailoutCond, ins->snapshot());
    763    }
    764 
    765    if (!mul->canOverflow()) {
    766      switch (constant) {
    767        case 2:
    768          if (lhs == out) {
    769            masm.addl(lhs, lhs);
    770          } else {
    771            masm.leal(Operand(lhs, lhs, TimesOne), out);
    772          }
    773          return;
    774        case 3:
    775          masm.leal(Operand(lhs, lhs, TimesTwo), out);
    776          return;
    777        case 4:
    778          if (lhs == out) {
    779            masm.shll(Imm32(2), lhs);
    780          } else {
    781            masm.leal(Operand(lhs, TimesFour, 0), out);
    782          }
    783          return;
    784        case 5:
    785          masm.leal(Operand(lhs, lhs, TimesFour), out);
    786          return;
    787        case 8:
    788          if (lhs == out) {
    789            masm.shll(Imm32(3), lhs);
    790          } else {
    791            masm.leal(Operand(lhs, TimesEight, 0), out);
    792          }
    793          return;
    794        case 9:
    795          masm.leal(Operand(lhs, lhs, TimesEight), out);
    796          return;
    797        default:
    798          // Use shift if cannot overflow and constant is power of 2
    799          int32_t shift = FloorLog2(constant);
    800          if (constant > 0 && (1 << shift) == constant) {
    801            if (lhs != out) {
    802              masm.movl(lhs, out);
    803            }
    804            masm.shll(Imm32(shift), out);
    805            return;
    806          }
    807      }
    808    }
    809 
    810    switch (constant) {
    811      case -1:
    812        if (lhs != out) {
    813          masm.movl(lhs, out);
    814        }
    815        masm.negl(out);
    816        break;
    817      case 0:
    818        masm.xorl(out, out);
    819        return;  // escape overflow check;
    820      case 1:
    821        if (lhs != out) {
    822          masm.movl(lhs, out);
    823        }
    824        return;  // escape overflow check;
    825      case 2:
    826        if (lhs == out) {
    827          masm.addl(lhs, lhs);
    828          break;
    829        }
    830        [[fallthrough]];
    831      default:
    832        masm.imull(Imm32(constant), lhs, out);
    833    }
    834 
    835    // Bailout on overflow
    836    if (mul->canOverflow()) {
    837      bailoutIf(Assembler::Overflow, ins->snapshot());
    838    }
    839  } else {
    840    MOZ_ASSERT(out == lhs);
    841 
    842    masm.imull(ToOperand(rhs), lhs);
    843 
    844    // Bailout on overflow
    845    if (mul->canOverflow()) {
    846      bailoutIf(Assembler::Overflow, ins->snapshot());
    847    }
    848 
    849    if (mul->canBeNegativeZero()) {
    850      // Jump to an OOL path if the result is 0.
    851      auto* ool =
    852          new (alloc()) LambdaOutOfLineCode([=, this](OutOfLineCode& ool) {
    853            Register result = ToRegister(ins->output());
    854            Operand lhsCopy = ToOperand(ins->lhsCopy());
    855            Operand rhs = ToOperand(ins->rhs());
    856            MOZ_ASSERT_IF(lhsCopy.kind() == Operand::REG,
    857                          lhsCopy.reg() != result.code());
    858 
    859            // Result is -0 if lhs or rhs is negative.
    860            masm.movl(lhsCopy, result);
    861            masm.orl(rhs, result);
    862            bailoutIf(Assembler::Signed, ins->snapshot());
    863 
    864            masm.mov(ImmWord(0), result);
    865            masm.jmp(ool.rejoin());
    866          });
    867      addOutOfLineCode(ool, mul);
    868 
    869      masm.test32(lhs, lhs);
    870      masm.j(Assembler::Zero, ool->entry());
    871      masm.bind(ool->rejoin());
    872    }
    873  }
    874 }
    875 
    876 template <class LIR>
    877 static void TrapIfDivideByZero(MacroAssembler& masm, LIR* lir, Register rhs) {
    878  auto* mir = lir->mir();
    879  MOZ_ASSERT(mir->trapOnError());
    880  MOZ_ASSERT(mir->canBeDivideByZero());
    881 
    882  Label nonZero;
    883  masm.branchTest32(Assembler::NonZero, rhs, rhs, &nonZero);
    884  masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->trapSiteDesc());
    885  masm.bind(&nonZero);
    886 }
    887 
    888 OutOfLineCode* CodeGeneratorX86Shared::emitOutOfLineZeroForDivideByZero(
    889    Register rhs, Register output) {
    890  // Truncated division by zero is zero: (±Infinity|0 == 0) and (NaN|0 == 0).
    891  auto* ool = new (alloc()) LambdaOutOfLineCode([=, this](OutOfLineCode& ool) {
    892    masm.mov(ImmWord(0), output);
    893    masm.jmp(ool.rejoin());
    894  });
    895  masm.branchTest32(Assembler::Zero, rhs, rhs, ool->entry());
    896 
    897  return ool;
    898 }
    899 
    900 void CodeGenerator::visitUDiv(LUDiv* ins) {
    901  Register rhs = ToRegister(ins->rhs());
    902  Register output = ToRegister(ins->output());
    903  Register remainder = ToRegister(ins->temp0());
    904 
    905  MOZ_ASSERT(ToRegister(ins->lhs()) == eax);
    906  MOZ_ASSERT(rhs != eax);
    907  MOZ_ASSERT(rhs != edx);
    908  MOZ_ASSERT(output == eax);
    909  MOZ_ASSERT(remainder == edx);
    910 
    911  MDiv* mir = ins->mir();
    912 
    913  OutOfLineCode* ool = nullptr;
    914 
    915  // Prevent divide by zero.
    916  if (mir->canBeDivideByZero()) {
    917    if (mir->trapOnError()) {
    918      TrapIfDivideByZero(masm, ins, rhs);
    919    } else if (mir->isTruncated()) {
    920      ool = emitOutOfLineZeroForDivideByZero(rhs, output);
    921    } else {
    922      MOZ_ASSERT(mir->fallible());
    923      bailoutTest32(Assembler::Zero, rhs, rhs, ins->snapshot());
    924    }
    925  }
    926 
    927  // Zero extend the lhs into edx to make (edx:eax), since udiv is 64-bit.
    928  masm.mov(ImmWord(0), edx);
    929  masm.udiv(rhs);
    930 
    931  // If the remainder is > 0, bailout since this must be a double.
    932  if (!mir->canTruncateRemainder()) {
    933    bailoutTest32(Assembler::NonZero, remainder, remainder, ins->snapshot());
    934  }
    935 
    936  // Unsigned div can return a value that's not a signed int32.
    937  // If our users aren't expecting that, bail.
    938  if (!mir->isTruncated()) {
    939    bailoutTest32(Assembler::Signed, output, output, ins->snapshot());
    940  }
    941 
    942  if (ool) {
    943    addOutOfLineCode(ool, mir);
    944    masm.bind(ool->rejoin());
    945  }
    946 }
    947 
    948 void CodeGenerator::visitUMod(LUMod* ins) {
    949  Register rhs = ToRegister(ins->rhs());
    950  Register output = ToRegister(ins->output());
    951 
    952  MOZ_ASSERT(ToRegister(ins->lhs()) == eax);
    953  MOZ_ASSERT(rhs != eax);
    954  MOZ_ASSERT(rhs != edx);
    955  MOZ_ASSERT(output == edx);
    956  MOZ_ASSERT(ToRegister(ins->temp0()) == eax);
    957 
    958  MMod* mir = ins->mir();
    959 
    960  OutOfLineCode* ool = nullptr;
    961 
    962  // Prevent divide by zero.
    963  if (mir->canBeDivideByZero()) {
    964    if (mir->trapOnError()) {
    965      TrapIfDivideByZero(masm, ins, rhs);
    966    } else if (mir->isTruncated()) {
    967      ool = emitOutOfLineZeroForDivideByZero(rhs, output);
    968    } else {
    969      MOZ_ASSERT(mir->fallible());
    970      bailoutTest32(Assembler::Zero, rhs, rhs, ins->snapshot());
    971    }
    972  }
    973 
    974  // Zero extend the lhs into edx to make (edx:eax), since udiv is 64-bit.
    975  masm.mov(ImmWord(0), edx);
    976  masm.udiv(rhs);
    977 
    978  // Unsigned mod can return a value that's not a signed int32.
    979  // If our users aren't expecting that, bail.
    980  if (!mir->isTruncated()) {
    981    bailoutTest32(Assembler::Signed, output, output, ins->snapshot());
    982  }
    983 
    984  if (ool) {
    985    addOutOfLineCode(ool, mir);
    986    masm.bind(ool->rejoin());
    987  }
    988 }
    989 
    990 template <class LUDivOrUMod>
    991 static void UnsignedDivideWithConstant(MacroAssembler& masm, LUDivOrUMod* ins,
    992                                       Register result, Register temp) {
    993  Register lhs = ToRegister(ins->numerator());
    994  uint32_t d = ins->denominator();
    995 
    996  MOZ_ASSERT(lhs != result && lhs != temp);
    997 #ifdef JS_CODEGEN_X86
    998  MOZ_ASSERT(result == edx && temp == eax);
    999 #else
   1000  MOZ_ASSERT(result != temp);
   1001 #endif
   1002 
   1003  // The denominator isn't a power of 2 (see LDivPowTwoI and LModPowTwoI).
   1004  MOZ_ASSERT(!mozilla::IsPowerOfTwo(d));
   1005 
   1006  auto rmc = ReciprocalMulConstants::computeUnsignedDivisionConstants(d);
   1007 
   1008  // We first compute (M * n) >> 32, where M = rmc.multiplier.
   1009 #ifdef JS_CODEGEN_X86
   1010  masm.movl(Imm32(rmc.multiplier), eax);
   1011  masm.umull(lhs);
   1012 #else
   1013  // Zero-extend |lhs| in preparation for a 64-bit multiplication.
   1014  masm.movl(lhs, result);
   1015 
   1016  // Note that imul sign-extends its 32-bit immediate, but we need an unsigned
   1017  // multiplication.
   1018  if (int32_t(rmc.multiplier) >= 0) {
   1019    masm.imulq(Imm32(rmc.multiplier), result, result);
   1020  } else {
   1021    masm.movl(Imm32(rmc.multiplier), temp);
   1022    masm.imulq(temp, result);
   1023  }
   1024  if (rmc.multiplier > UINT32_MAX || rmc.shiftAmount == 0) {
   1025    masm.shrq(Imm32(32), result);
   1026  }
   1027 #endif
   1028  if (rmc.multiplier > UINT32_MAX) {
   1029    // M >= 2^32 and shift == 0 is impossible, as d >= 2 implies that
   1030    // ((M * n) >> (32 + shift)) >= n > floor(n/d) whenever n >= d,
   1031    // contradicting the proof of correctness in computeDivisionConstants.
   1032    MOZ_ASSERT(rmc.shiftAmount > 0);
   1033    MOZ_ASSERT(rmc.multiplier < (int64_t(1) << 33));
   1034 
   1035    // We actually computed result = ((uint32_t(M) * n) >> 32) instead. Since
   1036    // (M * n) >> (32 + shift) is the same as (result + n) >> shift, we can
   1037    // correct for the overflow. This case is a bit trickier than the signed
   1038    // case, though, as the (result + n) addition itself can overflow; however,
   1039    // note that
   1040    // (result + n) >> shift == (((n - result) >> 1) + result) >> (shift - 1),
   1041    // which is overflow-free. See Hacker's Delight, section 10-8 for details.
   1042 
   1043    // Compute (n - result) >> 1 into temp.
   1044    masm.movl(lhs, temp);
   1045    masm.subl(result, temp);
   1046    masm.shrl(Imm32(1), temp);
   1047 
   1048    // Finish the computation.
   1049    masm.addl(temp, result);
   1050    if (rmc.shiftAmount > 1) {
   1051      masm.shrl(Imm32(rmc.shiftAmount - 1), result);
   1052    }
   1053  } else {
   1054    if (rmc.shiftAmount > 0) {
   1055 #ifdef JS_CODEGEN_X86
   1056      masm.shrl(Imm32(rmc.shiftAmount), result);
   1057 #else
   1058      masm.shrq(Imm32(32 + rmc.shiftAmount), result);
   1059 #endif
   1060    }
   1061  }
   1062 }
   1063 
   1064 void CodeGenerator::visitUDivConstant(LUDivConstant* ins) {
   1065  Register lhs = ToRegister(ins->numerator());
   1066  Register output = ToRegister(ins->output());
   1067  Register temp = ToRegister(ins->temp0());
   1068  uint32_t d = ins->denominator();
   1069 
   1070  MDiv* mir = ins->mir();
   1071 
   1072 #ifdef JS_CODEGEN_X86
   1073  // This emits the division answer into edx.
   1074  MOZ_ASSERT(output == edx);
   1075  MOZ_ASSERT(temp == eax);
   1076 #endif
   1077 
   1078  if (d == 0) {
   1079    if (mir->trapOnError()) {
   1080      masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->trapSiteDesc());
   1081    } else if (mir->isTruncated()) {
   1082      masm.xorl(output, output);
   1083    } else {
   1084      bailout(ins->snapshot());
   1085    }
   1086    return;
   1087  }
   1088 
   1089  // Compute the truncated division result in |output|.
   1090  UnsignedDivideWithConstant(masm, ins, output, temp);
   1091 
   1092  if (!mir->isTruncated()) {
   1093    masm.imull(Imm32(d), output, temp);
   1094    bailoutCmp32(Assembler::NotEqual, lhs, temp, ins->snapshot());
   1095  }
   1096 }
   1097 
   1098 void CodeGenerator::visitUModConstant(LUModConstant* ins) {
   1099  Register lhs = ToRegister(ins->numerator());
   1100  Register output = ToRegister(ins->output());
   1101  Register temp = ToRegister(ins->temp0());
   1102  uint32_t d = ins->denominator();
   1103 
   1104  MMod* mir = ins->mir();
   1105 
   1106 #ifdef JS_CODEGEN_X86
   1107  // This emits the modulus answer into eax.
   1108  MOZ_ASSERT(output == eax);
   1109  MOZ_ASSERT(temp == edx);
   1110 #endif
   1111 
   1112  if (d == 0) {
   1113    if (mir->trapOnError()) {
   1114      masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->trapSiteDesc());
   1115    } else if (mir->isTruncated()) {
   1116      masm.xorl(output, output);
   1117    } else {
   1118      bailout(ins->snapshot());
   1119    }
   1120    return;
   1121  }
   1122 
   1123  // Compute the truncated division result in |temp|.
   1124  UnsignedDivideWithConstant(masm, ins, temp, output);
   1125 
   1126  // We now have the truncated division value in |temp|. If we're computing a
   1127  // modulus or checking whether the division resulted in an integer, we need
   1128  // to multiply the obtained value by d and finish the computation/check.
   1129  //
   1130  // output = lhs - d * temp
   1131  masm.imull(Imm32(d), temp, temp);
   1132  masm.movl(lhs, output);
   1133  masm.subl(temp, output);
   1134 
   1135  // The final result of the modulus op, just computed above by the
   1136  // sub instruction, can be a number in the range [2^31, 2^32). If
   1137  // this is the case and the modulus is not truncated, we must bail
   1138  // out.
   1139  if (!mir->isTruncated()) {
   1140    bailoutIf(Assembler::Signed, ins->snapshot());
   1141  }
   1142 }
   1143 
   1144 void CodeGenerator::visitDivPowTwoI(LDivPowTwoI* ins) {
   1145  Register lhs = ToRegister(ins->numerator());
   1146  DebugOnly<Register> output = ToRegister(ins->output());
   1147 
   1148  int32_t shift = ins->shift();
   1149  bool negativeDivisor = ins->negativeDivisor();
   1150  MDiv* mir = ins->mir();
   1151 
   1152  // We use defineReuseInput so these should always be the same, which is
   1153  // convenient since all of our instructions here are two-address.
   1154  MOZ_ASSERT(lhs == output);
   1155 
   1156  if (!mir->isTruncated() && negativeDivisor) {
   1157    // 0 divided by a negative number must return a double.
   1158    bailoutTest32(Assembler::Zero, lhs, lhs, ins->snapshot());
   1159  }
   1160 
   1161  if (shift) {
   1162    if (!mir->isTruncated()) {
   1163      // If the remainder is != 0, bailout since this must be a double.
   1164      bailoutTest32(Assembler::NonZero, lhs, Imm32(UINT32_MAX >> (32 - shift)),
   1165                    ins->snapshot());
   1166    }
   1167 
   1168    if (mir->isUnsigned()) {
   1169      masm.shrl(Imm32(shift), lhs);
   1170    } else {
   1171      // Adjust the value so that shifting produces a correctly
   1172      // rounded result when the numerator is negative. See 10-1
   1173      // "Signed Division by a Known Power of 2" in Henry
   1174      // S. Warren, Jr.'s Hacker's Delight.
   1175      if (mir->canBeNegativeDividend() && mir->isTruncated()) {
   1176        // Note: There is no need to execute this code, which handles how to
   1177        // round the signed integer division towards 0, if we previously bailed
   1178        // due to a non-zero remainder.
   1179        Register lhsCopy = ToRegister(ins->numeratorCopy());
   1180        MOZ_ASSERT(lhsCopy != lhs);
   1181        if (shift > 1) {
   1182          // Copy the sign bit of the numerator. (= (2^32 - 1) or 0)
   1183          masm.sarl(Imm32(31), lhs);
   1184        }
   1185        // Divide by 2^(32 - shift)
   1186        // i.e. (= (2^32 - 1) / 2^(32 - shift) or 0)
   1187        // i.e. (= (2^shift - 1) or 0)
   1188        masm.shrl(Imm32(32 - shift), lhs);
   1189        // If signed, make any 1 bit below the shifted bits to bubble up, such
   1190        // that once shifted the value would be rounded towards 0.
   1191        masm.addl(lhsCopy, lhs);
   1192      }
   1193      masm.sarl(Imm32(shift), lhs);
   1194 
   1195      if (negativeDivisor) {
   1196        masm.negl(lhs);
   1197      }
   1198    }
   1199    return;
   1200  }
   1201 
   1202  if (negativeDivisor) {
   1203    // INT32_MIN / -1 overflows.
   1204    masm.negl(lhs);
   1205    if (!mir->isTruncated()) {
   1206      bailoutIf(Assembler::Overflow, ins->snapshot());
   1207    } else if (mir->trapOnError()) {
   1208      Label ok;
   1209      masm.j(Assembler::NoOverflow, &ok);
   1210      masm.wasmTrap(wasm::Trap::IntegerOverflow, mir->trapSiteDesc());
   1211      masm.bind(&ok);
   1212    }
   1213  } else if (mir->isUnsigned() && !mir->isTruncated()) {
   1214    // Unsigned division by 1 can overflow if output is not truncated.
   1215    bailoutTest32(Assembler::Signed, lhs, lhs, ins->snapshot());
   1216  }
   1217 }
   1218 
   1219 template <class LDivOrMod>
   1220 static void DivideWithConstant(MacroAssembler& masm, LDivOrMod* ins,
   1221                               Register result, Register temp) {
   1222  Register lhs = ToRegister(ins->numerator());
   1223  int32_t d = ins->denominator();
   1224 
   1225  MOZ_ASSERT(lhs != result && lhs != temp);
   1226 #ifdef JS_CODEGEN_X86
   1227  MOZ_ASSERT(result == edx && temp == eax);
   1228 #else
   1229  MOZ_ASSERT(result != temp);
   1230 #endif
   1231 
   1232  // The absolute value of the denominator isn't a power of 2 (see LDivPowTwoI
   1233  // and LModPowTwoI).
   1234  MOZ_ASSERT(!mozilla::IsPowerOfTwo(mozilla::Abs(d)));
   1235 
   1236  auto* mir = ins->mir();
   1237 
   1238  // We will first divide by Abs(d), and negate the answer if d is negative.
   1239  // If desired, this can be avoided by generalizing computeDivisionConstants.
   1240  auto rmc = ReciprocalMulConstants::computeSignedDivisionConstants(d);
   1241 
   1242  // We first compute (M * n) >> 32, where M = rmc.multiplier.
   1243 #ifdef JS_CODEGEN_X86
   1244  masm.movl(Imm32(rmc.multiplier), eax);
   1245  masm.imull(lhs);
   1246 #else
   1247  // Sign-extend |lhs| in preparation for a 64-bit multiplication.
   1248  masm.movslq(lhs, result);
   1249  masm.imulq(Imm32(rmc.multiplier), result, result);
   1250  if (rmc.multiplier > INT32_MAX || rmc.shiftAmount == 0) {
   1251    masm.shrq(Imm32(32), result);
   1252  }
   1253 #endif
   1254  if (rmc.multiplier > INT32_MAX) {
   1255    MOZ_ASSERT(rmc.multiplier < (int64_t(1) << 32));
   1256 
   1257    // We actually computed result = ((int32_t(M) * n) >> 32) instead. Since
   1258    // (M * n) >> 32 is the same as (result + n), we can correct for the
   1259    // overflow. (result + n) can't overflow, as n and |result| have opposite
   1260    // signs because int32_t(M) is negative.
   1261    masm.addl(lhs, result);
   1262  }
   1263  // (M * n) >> (32 + shift) is the truncated division answer if n is
   1264  // non-negative, as proved in the comments of computeDivisionConstants. We
   1265  // must add 1 later if n is negative to get the right answer in all cases.
   1266  if (rmc.shiftAmount > 0) {
   1267 #ifdef JS_CODEGEN_X86
   1268    masm.sarl(Imm32(rmc.shiftAmount), result);
   1269 #else
   1270    if (rmc.multiplier > INT32_MAX) {
   1271      masm.sarl(Imm32(rmc.shiftAmount), result);
   1272    } else {
   1273      masm.sarq(Imm32(32 + rmc.shiftAmount), result);
   1274    }
   1275 #endif
   1276  }
   1277 
   1278  // We'll subtract -1 instead of adding 1, because (n < 0 ? -1 : 0) can be
   1279  // computed with just a sign-extending shift of 31 bits.
   1280  if (mir->canBeNegativeDividend()) {
   1281    masm.movl(lhs, temp);
   1282    masm.sarl(Imm32(31), temp);
   1283    masm.subl(temp, result);
   1284  }
   1285 
   1286  // After this, |result| contains the correct truncated division result.
   1287  if (d < 0) {
   1288    masm.negl(result);
   1289  }
   1290 }
   1291 
   1292 void CodeGenerator::visitDivConstantI(LDivConstantI* ins) {
   1293  Register lhs = ToRegister(ins->numerator());
   1294  Register output = ToRegister(ins->output());
   1295  Register temp = ToRegister(ins->temp0());
   1296  int32_t d = ins->denominator();
   1297 
   1298  MDiv* mir = ins->mir();
   1299 
   1300 #ifdef JS_CODEGEN_X86
   1301  // This emits the division answer into edx.
   1302  MOZ_ASSERT(output == edx);
   1303  MOZ_ASSERT(temp == eax);
   1304 #endif
   1305 
   1306  if (d == 0) {
   1307    if (mir->trapOnError()) {
   1308      masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->trapSiteDesc());
   1309    } else if (mir->isTruncated()) {
   1310      masm.xorl(output, output);
   1311    } else {
   1312      bailout(ins->snapshot());
   1313    }
   1314    return;
   1315  }
   1316 
   1317  // Compute the truncated division result in |output|.
   1318  DivideWithConstant(masm, ins, output, temp);
   1319 
   1320  if (!mir->isTruncated()) {
   1321    // This is a division op. Multiply the obtained value by d to check if
   1322    // the correct answer is an integer. This cannot overflow, since |d| > 1.
   1323    masm.imull(Imm32(d), output, temp);
   1324    bailoutCmp32(Assembler::NotEqual, lhs, temp, ins->snapshot());
   1325 
   1326    // If lhs is zero and the divisor is negative, the answer should have
   1327    // been -0.
   1328    if (d < 0) {
   1329      bailoutTest32(Assembler::Zero, lhs, lhs, ins->snapshot());
   1330    }
   1331  }
   1332 }
   1333 
   1334 void CodeGenerator::visitModConstantI(LModConstantI* ins) {
   1335  Register lhs = ToRegister(ins->numerator());
   1336  Register output = ToRegister(ins->output());
   1337  Register temp = ToRegister(ins->temp0());
   1338  int32_t d = ins->denominator();
   1339 
   1340  MMod* mir = ins->mir();
   1341 
   1342 #ifdef JS_CODEGEN_X86
   1343  // This emits the modulus answer into eax.
   1344  MOZ_ASSERT(output == eax);
   1345  MOZ_ASSERT(temp == edx);
   1346 #endif
   1347 
   1348  if (d == 0) {
   1349    if (mir->trapOnError()) {
   1350      masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->trapSiteDesc());
   1351    } else if (mir->isTruncated()) {
   1352      masm.xorl(output, output);
   1353    } else {
   1354      bailout(ins->snapshot());
   1355    }
   1356    return;
   1357  }
   1358 
   1359  // Compute the truncated division result in |temp|.
   1360  DivideWithConstant(masm, ins, temp, output);
   1361 
   1362  // Compute the remainder in |output|: output = lhs - d * temp
   1363  masm.imull(Imm32(-d), temp, output);
   1364  masm.addl(lhs, output);
   1365 
   1366  if (!mir->isTruncated() && mir->canBeNegativeDividend()) {
   1367    // This is a mod op. If the computed value is zero and lhs
   1368    // is negative, the answer should have been -0.
   1369    Label done;
   1370    masm.branch32(Assembler::GreaterThanOrEqual, lhs, Imm32(0), &done);
   1371    bailoutTest32(Assembler::Zero, output, output, ins->snapshot());
   1372    masm.bind(&done);
   1373  }
   1374 }
   1375 
   1376 void CodeGenerator::visitDivI(LDivI* ins) {
   1377  Register remainder = ToRegister(ins->temp0());
   1378  Register lhs = ToRegister(ins->lhs());
   1379  Register rhs = ToRegister(ins->rhs());
   1380  Register output = ToRegister(ins->output());
   1381 
   1382  MOZ_ASSERT(lhs == eax);
   1383  MOZ_ASSERT(rhs != eax);
   1384  MOZ_ASSERT(rhs != edx);
   1385  MOZ_ASSERT(remainder == edx);
   1386  MOZ_ASSERT(output == eax);
   1387 
   1388  MDiv* mir = ins->mir();
   1389 
   1390  Label done;
   1391  OutOfLineCode* ool = nullptr;
   1392 
   1393  // Handle divide by zero.
   1394  if (mir->canBeDivideByZero()) {
   1395    if (mir->trapOnError()) {
   1396      TrapIfDivideByZero(masm, ins, rhs);
   1397    } else if (mir->canTruncateInfinities()) {
   1398      ool = emitOutOfLineZeroForDivideByZero(rhs, output);
   1399    } else {
   1400      MOZ_ASSERT(mir->fallible());
   1401      bailoutTest32(Assembler::Zero, rhs, rhs, ins->snapshot());
   1402    }
   1403  }
   1404 
   1405  // Handle an integer overflow exception from -2147483648 / -1.
   1406  if (mir->canBeNegativeOverflow()) {
   1407    Label notOverflow;
   1408    masm.branch32(Assembler::NotEqual, lhs, Imm32(INT32_MIN), &notOverflow);
   1409    if (mir->trapOnError()) {
   1410      masm.branch32(Assembler::NotEqual, rhs, Imm32(-1), &notOverflow);
   1411      masm.wasmTrap(wasm::Trap::IntegerOverflow, mir->trapSiteDesc());
   1412    } else if (mir->canTruncateOverflow()) {
   1413      // (-INT32_MIN)|0 == INT32_MIN and INT32_MIN is already in the
   1414      // output register (lhs == eax).
   1415      masm.branch32(Assembler::Equal, rhs, Imm32(-1), &done);
   1416    } else {
   1417      MOZ_ASSERT(mir->fallible());
   1418      bailoutCmp32(Assembler::Equal, rhs, Imm32(-1), ins->snapshot());
   1419    }
   1420    masm.bind(&notOverflow);
   1421  }
   1422 
   1423  // Handle negative 0.
   1424  if (!mir->canTruncateNegativeZero() && mir->canBeNegativeZero()) {
   1425    Label nonzero;
   1426    masm.branchTest32(Assembler::NonZero, lhs, lhs, &nonzero);
   1427    bailoutCmp32(Assembler::LessThan, rhs, Imm32(0), ins->snapshot());
   1428    masm.bind(&nonzero);
   1429  }
   1430 
   1431  // Sign extend the lhs into edx to make (edx:eax), since idiv is 64-bit.
   1432  masm.cdq();
   1433  masm.idiv(rhs);
   1434 
   1435  if (!mir->canTruncateRemainder()) {
   1436    // If the remainder is > 0, bailout since this must be a double.
   1437    bailoutTest32(Assembler::NonZero, remainder, remainder, ins->snapshot());
   1438  }
   1439 
   1440  masm.bind(&done);
   1441 
   1442  if (ool) {
   1443    addOutOfLineCode(ool, mir);
   1444    masm.bind(ool->rejoin());
   1445  }
   1446 }
   1447 
   1448 void CodeGenerator::visitModPowTwoI(LModPowTwoI* ins) {
   1449  Register lhs = ToRegister(ins->input());
   1450  int32_t shift = ins->shift();
   1451  bool canBeNegative =
   1452      !ins->mir()->isUnsigned() && ins->mir()->canBeNegativeDividend();
   1453 
   1454  if (shift == 0) {
   1455    if (canBeNegative && !ins->mir()->isTruncated()) {
   1456      bailoutTest32(Assembler::Signed, lhs, lhs, ins->snapshot());
   1457    }
   1458    masm.xorl(lhs, lhs);
   1459    return;
   1460  }
   1461 
   1462  auto clearHighBits = [&]() {
   1463    switch (shift) {
   1464      case 16:
   1465        masm.movzwl(lhs, lhs);
   1466        break;
   1467      case 8:
   1468        if (AllocatableGeneralRegisterSet(Registers::SingleByteRegs).has(lhs)) {
   1469          masm.movzbl(lhs, lhs);
   1470          break;
   1471        }
   1472        [[fallthrough]];
   1473      default:
   1474        masm.andl(Imm32((uint32_t(1) << shift) - 1), lhs);
   1475        break;
   1476    }
   1477  };
   1478 
   1479  Label negative;
   1480 
   1481  if (canBeNegative) {
   1482    // Switch based on sign of the lhs.
   1483    // Positive numbers are just a bitmask
   1484    masm.branchTest32(Assembler::Signed, lhs, lhs, &negative);
   1485  }
   1486 
   1487  clearHighBits();
   1488 
   1489  if (canBeNegative) {
   1490    Label done;
   1491    masm.jump(&done);
   1492 
   1493    // Negative numbers need a negate, bitmask, negate
   1494    masm.bind(&negative);
   1495 
   1496    // Unlike in the visitModI case, we are not computing the mod by means of a
   1497    // division. Therefore, the divisor = -1 case isn't problematic (the andl
   1498    // always returns 0, which is what we expect).
   1499    //
   1500    // The negl instruction overflows if lhs == INT32_MIN, but this is also not
   1501    // a problem: shift is at most 31, and so the andl also always returns 0.
   1502    masm.negl(lhs);
   1503    clearHighBits();
   1504    masm.negl(lhs);
   1505 
   1506    // Since a%b has the same sign as b, and a is negative in this branch,
   1507    // an answer of 0 means the correct result is actually -0. Bail out.
   1508    if (!ins->mir()->isTruncated()) {
   1509      bailoutIf(Assembler::Zero, ins->snapshot());
   1510    }
   1511    masm.bind(&done);
   1512  }
   1513 }
   1514 
   1515 class ModOverflowCheck : public OutOfLineCodeBase<CodeGeneratorX86Shared> {
   1516  Label done_;
   1517  LModI* ins_;
   1518  Register rhs_;
   1519 
   1520 public:
   1521  explicit ModOverflowCheck(LModI* ins, Register rhs) : ins_(ins), rhs_(rhs) {}
   1522 
   1523  virtual void accept(CodeGeneratorX86Shared* codegen) override {
   1524    codegen->visitModOverflowCheck(this);
   1525  }
   1526  Label* done() { return &done_; }
   1527  LModI* ins() const { return ins_; }
   1528  Register rhs() const { return rhs_; }
   1529 };
   1530 
   1531 void CodeGeneratorX86Shared::visitModOverflowCheck(ModOverflowCheck* ool) {
   1532  masm.cmp32(ool->rhs(), Imm32(-1));
   1533  if (ool->ins()->mir()->isTruncated()) {
   1534    masm.j(Assembler::NotEqual, ool->rejoin());
   1535    masm.mov(ImmWord(0), edx);
   1536    masm.jmp(ool->done());
   1537  } else {
   1538    bailoutIf(Assembler::Equal, ool->ins()->snapshot());
   1539    masm.jmp(ool->rejoin());
   1540  }
   1541 }
   1542 
   1543 void CodeGenerator::visitModI(LModI* ins) {
   1544  Register remainder = ToRegister(ins->output());
   1545  Register lhs = ToRegister(ins->lhs());
   1546  Register rhs = ToRegister(ins->rhs());
   1547 
   1548  // Required to use idiv.
   1549  MOZ_ASSERT(lhs == eax);
   1550  MOZ_ASSERT(rhs != eax);
   1551  MOZ_ASSERT(rhs != edx);
   1552  MOZ_ASSERT(remainder == edx);
   1553  MOZ_ASSERT(ToRegister(ins->temp0()) == eax);
   1554 
   1555  MMod* mir = ins->mir();
   1556 
   1557  Label done;
   1558  OutOfLineCode* ool = nullptr;
   1559  ModOverflowCheck* overflow = nullptr;
   1560 
   1561  // Prevent divide by zero.
   1562  if (mir->canBeDivideByZero()) {
   1563    if (mir->trapOnError()) {
   1564      TrapIfDivideByZero(masm, ins, rhs);
   1565    } else if (mir->isTruncated()) {
   1566      ool = emitOutOfLineZeroForDivideByZero(rhs, remainder);
   1567    } else {
   1568      MOZ_ASSERT(mir->fallible());
   1569      bailoutTest32(Assembler::Zero, rhs, rhs, ins->snapshot());
   1570    }
   1571  }
   1572 
   1573  Label negative;
   1574 
   1575  // Switch based on sign of the lhs.
   1576  if (mir->canBeNegativeDividend()) {
   1577    masm.branchTest32(Assembler::Signed, lhs, lhs, &negative);
   1578  }
   1579 
   1580  // If lhs >= 0 then remainder = lhs % rhs. The remainder must be positive.
   1581  {
   1582    // Check if rhs is a power-of-two.
   1583    if (mir->canBePowerOfTwoDivisor()) {
   1584      MOZ_ASSERT(rhs != remainder);
   1585 
   1586      // Rhs y is a power-of-two if (y & (y-1)) == 0. Note that if
   1587      // y is any negative number other than INT32_MIN, both y and
   1588      // y-1 will have the sign bit set so these are never optimized
   1589      // as powers-of-two. If y is INT32_MIN, y-1 will be INT32_MAX
   1590      // and because lhs >= 0 at this point, lhs & INT32_MAX returns
   1591      // the correct value.
   1592      Label notPowerOfTwo;
   1593      masm.mov(rhs, remainder);
   1594      masm.subl(Imm32(1), remainder);
   1595      masm.branchTest32(Assembler::NonZero, remainder, rhs, &notPowerOfTwo);
   1596      {
   1597        masm.andl(lhs, remainder);
   1598        masm.jmp(&done);
   1599      }
   1600      masm.bind(&notPowerOfTwo);
   1601    }
   1602 
   1603    // Since lhs >= 0, the sign-extension will be 0
   1604    masm.mov(ImmWord(0), edx);
   1605    masm.idiv(rhs);
   1606  }
   1607 
   1608  // Otherwise, we have to beware of two special cases:
   1609  if (mir->canBeNegativeDividend()) {
   1610    masm.jump(&done);
   1611 
   1612    masm.bind(&negative);
   1613 
   1614    // Prevent an integer overflow exception from -2147483648 % -1
   1615    overflow = new (alloc()) ModOverflowCheck(ins, rhs);
   1616    masm.branch32(Assembler::Equal, lhs, Imm32(INT32_MIN), overflow->entry());
   1617    masm.bind(overflow->rejoin());
   1618 
   1619    masm.cdq();
   1620    masm.idiv(rhs);
   1621 
   1622    if (!mir->isTruncated()) {
   1623      // A remainder of 0 means that the rval must be -0, which is a double.
   1624      bailoutTest32(Assembler::Zero, remainder, remainder, ins->snapshot());
   1625    }
   1626  }
   1627 
   1628  masm.bind(&done);
   1629 
   1630  if (overflow) {
   1631    addOutOfLineCode(overflow, mir);
   1632    masm.bind(overflow->done());
   1633  }
   1634 
   1635  if (ool) {
   1636    addOutOfLineCode(ool, mir);
   1637    masm.bind(ool->rejoin());
   1638  }
   1639 }
   1640 
   1641 void CodeGenerator::visitBitNotI(LBitNotI* ins) {
   1642  Register input = ToRegister(ins->input());
   1643  MOZ_ASSERT(input == ToRegister(ins->output()));
   1644 
   1645  masm.notl(input);
   1646 }
   1647 
   1648 void CodeGenerator::visitBitOpI(LBitOpI* ins) {
   1649  Register lhs = ToRegister(ins->lhs());
   1650  const LAllocation* rhs = ins->rhs();
   1651 
   1652  MOZ_ASSERT(lhs == ToRegister(ins->output()));
   1653 
   1654  switch (ins->bitop()) {
   1655    case JSOp::BitOr:
   1656      if (rhs->isConstant()) {
   1657        masm.orl(Imm32(ToInt32(rhs)), lhs);
   1658      } else {
   1659        masm.orl(ToOperand(rhs), lhs);
   1660      }
   1661      break;
   1662    case JSOp::BitXor:
   1663      if (rhs->isConstant()) {
   1664        masm.xorl(Imm32(ToInt32(rhs)), lhs);
   1665      } else {
   1666        masm.xorl(ToOperand(rhs), lhs);
   1667      }
   1668      break;
   1669    case JSOp::BitAnd:
   1670      if (rhs->isConstant()) {
   1671        masm.andl(Imm32(ToInt32(rhs)), lhs);
   1672      } else {
   1673        masm.andl(ToOperand(rhs), lhs);
   1674      }
   1675      break;
   1676    default:
   1677      MOZ_CRASH("unexpected binary opcode");
   1678  }
   1679 }
   1680 
   1681 void CodeGenerator::visitBitOpI64(LBitOpI64* lir) {
   1682  Register64 lhs = ToRegister64(lir->lhs());
   1683  LInt64Allocation rhs = lir->rhs();
   1684 
   1685  MOZ_ASSERT(ToOutRegister64(lir) == lhs);
   1686 
   1687  switch (lir->bitop()) {
   1688    case JSOp::BitOr:
   1689      if (IsConstant(rhs)) {
   1690        masm.or64(Imm64(ToInt64(rhs)), lhs);
   1691      } else {
   1692        masm.or64(ToOperandOrRegister64(rhs), lhs);
   1693      }
   1694      break;
   1695    case JSOp::BitXor:
   1696      if (IsConstant(rhs)) {
   1697        masm.xor64(Imm64(ToInt64(rhs)), lhs);
   1698      } else {
   1699        masm.xor64(ToOperandOrRegister64(rhs), lhs);
   1700      }
   1701      break;
   1702    case JSOp::BitAnd:
   1703      if (IsConstant(rhs)) {
   1704        masm.and64(Imm64(ToInt64(rhs)), lhs);
   1705      } else {
   1706        masm.and64(ToOperandOrRegister64(rhs), lhs);
   1707      }
   1708      break;
   1709    default:
   1710      MOZ_CRASH("unexpected binary opcode");
   1711  }
   1712 }
   1713 
   1714 void CodeGenerator::visitShiftI(LShiftI* ins) {
   1715  Register lhs = ToRegister(ins->lhs());
   1716  const LAllocation* rhs = ins->rhs();
   1717  Register out = ToRegister(ins->output());
   1718 
   1719  if (rhs->isConstant()) {
   1720    MOZ_ASSERT(out == lhs);
   1721 
   1722    int32_t shift = ToInt32(rhs) & 0x1F;
   1723    switch (ins->bitop()) {
   1724      case JSOp::Lsh:
   1725        if (shift) {
   1726          masm.lshift32(Imm32(shift), lhs);
   1727        }
   1728        break;
   1729      case JSOp::Rsh:
   1730        if (shift) {
   1731          masm.rshift32Arithmetic(Imm32(shift), lhs);
   1732        }
   1733        break;
   1734      case JSOp::Ursh:
   1735        if (shift) {
   1736          masm.rshift32(Imm32(shift), lhs);
   1737        } else if (ins->mir()->toUrsh()->fallible()) {
   1738          // x >>> 0 can overflow.
   1739          masm.test32(lhs, lhs);
   1740          bailoutIf(Assembler::Signed, ins->snapshot());
   1741        }
   1742        break;
   1743      default:
   1744        MOZ_CRASH("Unexpected shift op");
   1745    }
   1746  } else {
   1747    Register shift = ToRegister(rhs);
   1748    MOZ_ASSERT_IF(out != lhs, Assembler::HasBMI2());
   1749 
   1750    switch (ins->bitop()) {
   1751      case JSOp::Lsh:
   1752        if (out != lhs) {
   1753          masm.shlxl(lhs, shift, out);
   1754        } else {
   1755          masm.lshift32(shift, lhs);
   1756        }
   1757        break;
   1758      case JSOp::Rsh:
   1759        if (out != lhs) {
   1760          masm.sarxl(lhs, shift, out);
   1761        } else {
   1762          masm.rshift32Arithmetic(shift, lhs);
   1763        }
   1764        break;
   1765      case JSOp::Ursh:
   1766        if (out != lhs) {
   1767          masm.shrxl(lhs, shift, out);
   1768        } else {
   1769          masm.rshift32(shift, lhs);
   1770        }
   1771        if (ins->mir()->toUrsh()->fallible()) {
   1772          // x >>> 0 can overflow.
   1773          masm.test32(out, out);
   1774          bailoutIf(Assembler::Signed, ins->snapshot());
   1775        }
   1776        break;
   1777      default:
   1778        MOZ_CRASH("Unexpected shift op");
   1779    }
   1780  }
   1781 }
   1782 
   1783 void CodeGenerator::visitUrshD(LUrshD* ins) {
   1784  Register lhs = ToRegister(ins->lhs());
   1785  const LAllocation* rhs = ins->rhs();
   1786  FloatRegister out = ToFloatRegister(ins->output());
   1787  Register temp = ToRegister(ins->temp0());
   1788 
   1789  if (rhs->isConstant()) {
   1790    MOZ_ASSERT(temp == lhs);
   1791 
   1792    int32_t shift = ToInt32(rhs) & 0x1F;
   1793    if (shift) {
   1794      masm.shrl(Imm32(shift), lhs);
   1795    }
   1796  } else {
   1797    MOZ_ASSERT_IF(temp != lhs, Assembler::HasBMI2());
   1798 
   1799    Register shift = ToRegister(rhs);
   1800    if (temp != lhs) {
   1801      masm.shrxl(lhs, shift, temp);
   1802    } else {
   1803      masm.rshift32(shift, lhs);
   1804    }
   1805  }
   1806 
   1807  masm.convertUInt32ToDouble(temp, out);
   1808 }
   1809 
   1810 Operand CodeGeneratorX86Shared::ToOperand(const LAllocation& a) {
   1811  if (a.isGeneralReg()) {
   1812    return Operand(a.toGeneralReg()->reg());
   1813  }
   1814  if (a.isFloatReg()) {
   1815    return Operand(a.toFloatReg()->reg());
   1816  }
   1817  return Operand(ToAddress(a));
   1818 }
   1819 
   1820 Operand CodeGeneratorX86Shared::ToOperand(const LAllocation* a) {
   1821  return ToOperand(*a);
   1822 }
   1823 
   1824 MoveOperand CodeGeneratorX86Shared::toMoveOperand(LAllocation a) const {
   1825  if (a.isGeneralReg()) {
   1826    return MoveOperand(ToRegister(a));
   1827  }
   1828  if (a.isFloatReg()) {
   1829    return MoveOperand(ToFloatRegister(a));
   1830  }
   1831  MoveOperand::Kind kind = a.isStackArea() ? MoveOperand::Kind::EffectiveAddress
   1832                                           : MoveOperand::Kind::Memory;
   1833  return MoveOperand(ToAddress(a), kind);
   1834 }
   1835 
   1836 class OutOfLineTableSwitch : public OutOfLineCodeBase<CodeGeneratorX86Shared> {
   1837  MTableSwitch* mir_;
   1838  CodeLabel jumpLabel_;
   1839 
   1840  void accept(CodeGeneratorX86Shared* codegen) override {
   1841    codegen->visitOutOfLineTableSwitch(this);
   1842  }
   1843 
   1844 public:
   1845  explicit OutOfLineTableSwitch(MTableSwitch* mir) : mir_(mir) {}
   1846 
   1847  MTableSwitch* mir() const { return mir_; }
   1848 
   1849  CodeLabel* jumpLabel() { return &jumpLabel_; }
   1850 };
   1851 
   1852 void CodeGeneratorX86Shared::visitOutOfLineTableSwitch(
   1853    OutOfLineTableSwitch* ool) {
   1854  MTableSwitch* mir = ool->mir();
   1855 
   1856  masm.haltingAlign(sizeof(void*));
   1857  masm.bind(ool->jumpLabel());
   1858  masm.addCodeLabel(*ool->jumpLabel());
   1859 
   1860  for (size_t i = 0; i < mir->numCases(); i++) {
   1861    LBlock* caseblock = skipTrivialBlocks(mir->getCase(i))->lir();
   1862    Label* caseheader = caseblock->label();
   1863    uint32_t caseoffset = caseheader->offset();
   1864 
   1865    // The entries of the jump table need to be absolute addresses and thus
   1866    // must be patched after codegen is finished.
   1867    CodeLabel cl;
   1868    masm.writeCodePointer(&cl);
   1869    cl.target()->bind(caseoffset);
   1870    masm.addCodeLabel(cl);
   1871  }
   1872 }
   1873 
   1874 void CodeGeneratorX86Shared::emitTableSwitchDispatch(MTableSwitch* mir,
   1875                                                     Register index,
   1876                                                     Register base) {
   1877  Label* defaultcase = skipTrivialBlocks(mir->getDefault())->lir()->label();
   1878 
   1879  // Lower value with low value
   1880  if (mir->low() != 0) {
   1881    masm.subl(Imm32(mir->low()), index);
   1882  }
   1883 
   1884  // Jump to default case if input is out of range
   1885  int32_t cases = mir->numCases();
   1886  masm.cmp32(index, Imm32(cases));
   1887  masm.j(AssemblerX86Shared::AboveOrEqual, defaultcase);
   1888 
   1889  // To fill in the CodeLabels for the case entries, we need to first
   1890  // generate the case entries (we don't yet know their offsets in the
   1891  // instruction stream).
   1892  OutOfLineTableSwitch* ool = new (alloc()) OutOfLineTableSwitch(mir);
   1893  addOutOfLineCode(ool, mir);
   1894 
   1895  // Compute the position where a pointer to the right case stands.
   1896  masm.mov(ool->jumpLabel(), base);
   1897  BaseIndex pointer(base, index, ScalePointer);
   1898 
   1899  // Jump to the right case
   1900  masm.branchToComputedAddress(pointer);
   1901 }
   1902 
   1903 void CodeGenerator::visitMathD(LMathD* math) {
   1904  FloatRegister lhs = ToFloatRegister(math->lhs());
   1905  Operand rhs = ToOperand(math->rhs());
   1906  FloatRegister output = ToFloatRegister(math->output());
   1907 
   1908  switch (math->jsop()) {
   1909    case JSOp::Add:
   1910      masm.vaddsd(rhs, lhs, output);
   1911      break;
   1912    case JSOp::Sub:
   1913      masm.vsubsd(rhs, lhs, output);
   1914      break;
   1915    case JSOp::Mul:
   1916      masm.vmulsd(rhs, lhs, output);
   1917      break;
   1918    case JSOp::Div:
   1919      masm.vdivsd(rhs, lhs, output);
   1920      break;
   1921    default:
   1922      MOZ_CRASH("unexpected opcode");
   1923  }
   1924 }
   1925 
   1926 void CodeGenerator::visitMathF(LMathF* math) {
   1927  FloatRegister lhs = ToFloatRegister(math->lhs());
   1928  Operand rhs = ToOperand(math->rhs());
   1929  FloatRegister output = ToFloatRegister(math->output());
   1930 
   1931  switch (math->jsop()) {
   1932    case JSOp::Add:
   1933      masm.vaddss(rhs, lhs, output);
   1934      break;
   1935    case JSOp::Sub:
   1936      masm.vsubss(rhs, lhs, output);
   1937      break;
   1938    case JSOp::Mul:
   1939      masm.vmulss(rhs, lhs, output);
   1940      break;
   1941    case JSOp::Div:
   1942      masm.vdivss(rhs, lhs, output);
   1943      break;
   1944    default:
   1945      MOZ_CRASH("unexpected opcode");
   1946  }
   1947 }
   1948 
   1949 void CodeGenerator::visitEffectiveAddress3(LEffectiveAddress3* ins) {
   1950  const MEffectiveAddress3* mir = ins->mir();
   1951  Register base = ToRegister(ins->base());
   1952  Register index = ToRegister(ins->index());
   1953  Register output = ToRegister(ins->output());
   1954  // Regarding performance, we rely on the fact that, if `mir->displacement()`
   1955  // is zero, `masm` will generate a 2-addend `leal`, and not a 3-addend one
   1956  // with a zero constant, since that is slower on some processors.
   1957  // See comments in EffectiveAddressAnalysis.cpp.
   1958  masm.leal(Operand(base, index, mir->scale(), mir->displacement()), output);
   1959 }
   1960 
   1961 void CodeGenerator::visitEffectiveAddress2(LEffectiveAddress2* ins) {
   1962  const MEffectiveAddress2* mir = ins->mir();
   1963  Register index = ToRegister(ins->index());
   1964  Register output = ToRegister(ins->output());
   1965  masm.leal(Operand(index, mir->scale(), mir->displacement()), output);
   1966 }
   1967 
   1968 void CodeGeneratorX86Shared::generateInvalidateEpilogue() {
   1969  // Ensure that there is enough space in the buffer for the OsiPoint
   1970  // patching to occur. Otherwise, we could overwrite the invalidation
   1971  // epilogue.
   1972  for (size_t i = 0; i < sizeof(void*); i += Assembler::NopSize()) {
   1973    masm.nop();
   1974  }
   1975 
   1976  masm.bind(&invalidate_);
   1977 
   1978  // Push the Ion script onto the stack (when we determine what that pointer
   1979  // is).
   1980  invalidateEpilogueData_ = masm.pushWithPatch(ImmWord(uintptr_t(-1)));
   1981 
   1982  // Jump to the invalidator which will replace the current frame.
   1983  TrampolinePtr thunk = gen->jitRuntime()->getInvalidationThunk();
   1984  masm.jump(thunk);
   1985 }
   1986 
   1987 void CodeGenerator::visitNegI(LNegI* ins) {
   1988  Register input = ToRegister(ins->input());
   1989  MOZ_ASSERT(input == ToRegister(ins->output()));
   1990 
   1991  masm.neg32(input);
   1992 }
   1993 
   1994 void CodeGenerator::visitNegI64(LNegI64* ins) {
   1995  Register64 input = ToRegister64(ins->input());
   1996  MOZ_ASSERT(input == ToOutRegister64(ins));
   1997  masm.neg64(input);
   1998 }
   1999 
   2000 void CodeGenerator::visitNegD(LNegD* ins) {
   2001  FloatRegister input = ToFloatRegister(ins->input());
   2002  FloatRegister output = ToFloatRegister(ins->output());
   2003 
   2004  // XOR the float in a float register with -0.0.
   2005  masm.vxorpdSimd128(SimdConstant::SplatX2(-0.0), input, output);
   2006 }
   2007 
   2008 void CodeGenerator::visitNegF(LNegF* ins) {
   2009  FloatRegister input = ToFloatRegister(ins->input());
   2010  FloatRegister output = ToFloatRegister(ins->output());
   2011 
   2012  // XOR the float in a float register with -0.0.
   2013  masm.vxorpsSimd128(SimdConstant::SplatX4(-0.0f), input, output);
   2014 }
   2015 
   2016 void CodeGenerator::visitCompareExchangeTypedArrayElement(
   2017    LCompareExchangeTypedArrayElement* lir) {
   2018  Register elements = ToRegister(lir->elements());
   2019  AnyRegister output = ToAnyRegister(lir->output());
   2020  Register temp = ToTempRegisterOrInvalid(lir->temp0());
   2021 
   2022  Register oldval = ToRegister(lir->oldval());
   2023  Register newval = ToRegister(lir->newval());
   2024 
   2025  Scalar::Type arrayType = lir->mir()->arrayType();
   2026 
   2027  auto dest = ToAddressOrBaseIndex(elements, lir->index(), arrayType);
   2028 
   2029  dest.match([&](const auto& dest) {
   2030    masm.compareExchangeJS(arrayType, Synchronization::Full(), dest, oldval,
   2031                           newval, temp, output);
   2032  });
   2033 }
   2034 
   2035 void CodeGenerator::visitAtomicExchangeTypedArrayElement(
   2036    LAtomicExchangeTypedArrayElement* lir) {
   2037  Register elements = ToRegister(lir->elements());
   2038  AnyRegister output = ToAnyRegister(lir->output());
   2039  Register temp = ToTempRegisterOrInvalid(lir->temp0());
   2040 
   2041  Register value = ToRegister(lir->value());
   2042 
   2043  Scalar::Type arrayType = lir->mir()->arrayType();
   2044 
   2045  auto dest = ToAddressOrBaseIndex(elements, lir->index(), arrayType);
   2046 
   2047  dest.match([&](const auto& dest) {
   2048    masm.atomicExchangeJS(arrayType, Synchronization::Full(), dest, value, temp,
   2049                          output);
   2050  });
   2051 }
   2052 
   2053 void CodeGenerator::visitAtomicTypedArrayElementBinop(
   2054    LAtomicTypedArrayElementBinop* lir) {
   2055  MOZ_ASSERT(!lir->mir()->isForEffect());
   2056 
   2057  AnyRegister output = ToAnyRegister(lir->output());
   2058  Register elements = ToRegister(lir->elements());
   2059  Register temp1 = ToTempRegisterOrInvalid(lir->temp0());
   2060  Register temp2 = ToTempRegisterOrInvalid(lir->temp1());
   2061  const LAllocation* value = lir->value();
   2062 
   2063  Scalar::Type arrayType = lir->mir()->arrayType();
   2064  AtomicOp atomicOp = lir->mir()->operation();
   2065 
   2066  auto mem = ToAddressOrBaseIndex(elements, lir->index(), arrayType);
   2067 
   2068  mem.match([&](const auto& mem) {
   2069    if (value->isConstant()) {
   2070      masm.atomicFetchOpJS(arrayType, Synchronization::Full(), atomicOp,
   2071                           Imm32(ToInt32(value)), mem, temp1, temp2, output);
   2072    } else {
   2073      masm.atomicFetchOpJS(arrayType, Synchronization::Full(), atomicOp,
   2074                           ToRegister(value), mem, temp1, temp2, output);
   2075    }
   2076  });
   2077 }
   2078 
   2079 void CodeGenerator::visitAtomicTypedArrayElementBinopForEffect(
   2080    LAtomicTypedArrayElementBinopForEffect* lir) {
   2081  MOZ_ASSERT(lir->mir()->isForEffect());
   2082 
   2083  Register elements = ToRegister(lir->elements());
   2084  const LAllocation* value = lir->value();
   2085  Scalar::Type arrayType = lir->mir()->arrayType();
   2086  AtomicOp atomicOp = lir->mir()->operation();
   2087 
   2088  auto mem = ToAddressOrBaseIndex(elements, lir->index(), arrayType);
   2089 
   2090  mem.match([&](const auto& mem) {
   2091    if (value->isConstant()) {
   2092      masm.atomicEffectOpJS(arrayType, Synchronization::Full(), atomicOp,
   2093                            Imm32(ToInt32(value)), mem, InvalidReg);
   2094    } else {
   2095      masm.atomicEffectOpJS(arrayType, Synchronization::Full(), atomicOp,
   2096                            ToRegister(value), mem, InvalidReg);
   2097    }
   2098  });
   2099 }
   2100 
   2101 void CodeGeneratorX86Shared::visitOutOfLineWasmTruncateCheck(
   2102    OutOfLineWasmTruncateCheck* ool) {
   2103  FloatRegister input = ool->input();
   2104  Register output = ool->output();
   2105  Register64 output64 = ool->output64();
   2106  MIRType fromType = ool->fromType();
   2107  MIRType toType = ool->toType();
   2108  Label* oolRejoin = ool->rejoin();
   2109  TruncFlags flags = ool->flags();
   2110  const wasm::TrapSiteDesc& trapSiteDesc = ool->trapSiteDesc();
   2111 
   2112  if (fromType == MIRType::Float32) {
   2113    if (toType == MIRType::Int32) {
   2114      masm.oolWasmTruncateCheckF32ToI32(input, output, flags, trapSiteDesc,
   2115                                        oolRejoin);
   2116    } else if (toType == MIRType::Int64) {
   2117      masm.oolWasmTruncateCheckF32ToI64(input, output64, flags, trapSiteDesc,
   2118                                        oolRejoin);
   2119    } else {
   2120      MOZ_CRASH("unexpected type");
   2121    }
   2122  } else if (fromType == MIRType::Double) {
   2123    if (toType == MIRType::Int32) {
   2124      masm.oolWasmTruncateCheckF64ToI32(input, output, flags, trapSiteDesc,
   2125                                        oolRejoin);
   2126    } else if (toType == MIRType::Int64) {
   2127      masm.oolWasmTruncateCheckF64ToI64(input, output64, flags, trapSiteDesc,
   2128                                        oolRejoin);
   2129    } else {
   2130      MOZ_CRASH("unexpected type");
   2131    }
   2132  } else {
   2133    MOZ_CRASH("unexpected type");
   2134  }
   2135 }
   2136 
   2137 template <typename T>
   2138 Operand CodeGeneratorX86Shared::toMemoryAccessOperand(T* lir, int32_t disp) {
   2139  const LAllocation* ptr = lir->ptr();
   2140  const LAllocation* memoryBase = lir->memoryBase();
   2141 #ifdef JS_CODEGEN_X86
   2142  Operand destAddr = ptr->isBogus() ? Operand(ToRegister(memoryBase), disp)
   2143                                    : Operand(ToRegister(memoryBase),
   2144                                              ToRegister(ptr), TimesOne, disp);
   2145 #else
   2146  auto baseReg = memoryBase->isBogus() ? HeapReg : ToRegister(memoryBase);
   2147  Operand destAddr = ptr->isBogus()
   2148                         ? Operand(baseReg, disp)
   2149                         : Operand(baseReg, ToRegister(ptr), TimesOne, disp);
   2150 #endif
   2151  return destAddr;
   2152 }
   2153 
   2154 void CodeGenerator::visitSimd128(LSimd128* ins) {
   2155 #ifdef ENABLE_WASM_SIMD
   2156  const LDefinition* out = ins->output();
   2157  masm.loadConstantSimd128(ins->simd128(), ToFloatRegister(out));
   2158 #else
   2159  MOZ_CRASH("No SIMD");
   2160 #endif
   2161 }
   2162 
   2163 void CodeGenerator::visitWasmTernarySimd128(LWasmTernarySimd128* ins) {
   2164 #ifdef ENABLE_WASM_SIMD
   2165  switch (ins->simdOp()) {
   2166    case wasm::SimdOp::V128Bitselect: {
   2167      FloatRegister lhsDest = ToFloatRegister(ins->v0());
   2168      FloatRegister rhs = ToFloatRegister(ins->v1());
   2169      FloatRegister control = ToFloatRegister(ins->v2());
   2170      FloatRegister temp = ToFloatRegister(ins->temp0());
   2171      masm.bitwiseSelectSimd128(control, lhsDest, rhs, lhsDest, temp);
   2172      break;
   2173    }
   2174    case wasm::SimdOp::F32x4RelaxedMadd:
   2175      masm.fmaFloat32x4(ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()),
   2176                        ToFloatRegister(ins->v2()));
   2177      break;
   2178    case wasm::SimdOp::F32x4RelaxedNmadd:
   2179      masm.fnmaFloat32x4(ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()),
   2180                         ToFloatRegister(ins->v2()));
   2181      break;
   2182    case wasm::SimdOp::F64x2RelaxedMadd:
   2183      masm.fmaFloat64x2(ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()),
   2184                        ToFloatRegister(ins->v2()));
   2185      break;
   2186    case wasm::SimdOp::F64x2RelaxedNmadd:
   2187      masm.fnmaFloat64x2(ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()),
   2188                         ToFloatRegister(ins->v2()));
   2189      break;
   2190    case wasm::SimdOp::I8x16RelaxedLaneSelect:
   2191    case wasm::SimdOp::I16x8RelaxedLaneSelect:
   2192    case wasm::SimdOp::I32x4RelaxedLaneSelect:
   2193    case wasm::SimdOp::I64x2RelaxedLaneSelect: {
   2194      FloatRegister lhs = ToFloatRegister(ins->v0());
   2195      FloatRegister rhs = ToFloatRegister(ins->v1());
   2196      FloatRegister mask = ToFloatRegister(ins->v2());
   2197      FloatRegister dest = ToFloatRegister(ins->output());
   2198      masm.laneSelectSimd128(mask, lhs, rhs, dest);
   2199      break;
   2200    }
   2201    case wasm::SimdOp::I32x4RelaxedDotI8x16I7x16AddS:
   2202      masm.dotInt8x16Int7x16ThenAdd(ToFloatRegister(ins->v0()),
   2203                                    ToFloatRegister(ins->v1()),
   2204                                    ToFloatRegister(ins->v2()));
   2205      break;
   2206    default:
   2207      MOZ_CRASH("NYI");
   2208  }
   2209 #else
   2210  MOZ_CRASH("No SIMD");
   2211 #endif
   2212 }
   2213 
   2214 void CodeGenerator::visitWasmBinarySimd128(LWasmBinarySimd128* ins) {
   2215 #ifdef ENABLE_WASM_SIMD
   2216  FloatRegister lhs = ToFloatRegister(ins->lhs());
   2217  FloatRegister rhs = ToFloatRegister(ins->rhs());
   2218  FloatRegister temp1 = ToTempFloatRegisterOrInvalid(ins->temp0());
   2219  FloatRegister temp2 = ToTempFloatRegisterOrInvalid(ins->temp1());
   2220  FloatRegister dest = ToFloatRegister(ins->output());
   2221 
   2222  switch (ins->simdOp()) {
   2223    case wasm::SimdOp::V128And:
   2224      masm.bitwiseAndSimd128(lhs, rhs, dest);
   2225      break;
   2226    case wasm::SimdOp::V128Or:
   2227      masm.bitwiseOrSimd128(lhs, rhs, dest);
   2228      break;
   2229    case wasm::SimdOp::V128Xor:
   2230      masm.bitwiseXorSimd128(lhs, rhs, dest);
   2231      break;
   2232    case wasm::SimdOp::V128AndNot:
   2233      // x86/x64 specific: The CPU provides ~A & B.  The operands were swapped
   2234      // during lowering, and we'll compute A & ~B here as desired.
   2235      masm.bitwiseNotAndSimd128(lhs, rhs, dest);
   2236      break;
   2237    case wasm::SimdOp::I8x16AvgrU:
   2238      masm.unsignedAverageInt8x16(lhs, rhs, dest);
   2239      break;
   2240    case wasm::SimdOp::I16x8AvgrU:
   2241      masm.unsignedAverageInt16x8(lhs, rhs, dest);
   2242      break;
   2243    case wasm::SimdOp::I8x16Add:
   2244      masm.addInt8x16(lhs, rhs, dest);
   2245      break;
   2246    case wasm::SimdOp::I8x16AddSatS:
   2247      masm.addSatInt8x16(lhs, rhs, dest);
   2248      break;
   2249    case wasm::SimdOp::I8x16AddSatU:
   2250      masm.unsignedAddSatInt8x16(lhs, rhs, dest);
   2251      break;
   2252    case wasm::SimdOp::I8x16Sub:
   2253      masm.subInt8x16(lhs, rhs, dest);
   2254      break;
   2255    case wasm::SimdOp::I8x16SubSatS:
   2256      masm.subSatInt8x16(lhs, rhs, dest);
   2257      break;
   2258    case wasm::SimdOp::I8x16SubSatU:
   2259      masm.unsignedSubSatInt8x16(lhs, rhs, dest);
   2260      break;
   2261    case wasm::SimdOp::I8x16MinS:
   2262      masm.minInt8x16(lhs, rhs, dest);
   2263      break;
   2264    case wasm::SimdOp::I8x16MinU:
   2265      masm.unsignedMinInt8x16(lhs, rhs, dest);
   2266      break;
   2267    case wasm::SimdOp::I8x16MaxS:
   2268      masm.maxInt8x16(lhs, rhs, dest);
   2269      break;
   2270    case wasm::SimdOp::I8x16MaxU:
   2271      masm.unsignedMaxInt8x16(lhs, rhs, dest);
   2272      break;
   2273    case wasm::SimdOp::I16x8Add:
   2274      masm.addInt16x8(lhs, rhs, dest);
   2275      break;
   2276    case wasm::SimdOp::I16x8AddSatS:
   2277      masm.addSatInt16x8(lhs, rhs, dest);
   2278      break;
   2279    case wasm::SimdOp::I16x8AddSatU:
   2280      masm.unsignedAddSatInt16x8(lhs, rhs, dest);
   2281      break;
   2282    case wasm::SimdOp::I16x8Sub:
   2283      masm.subInt16x8(lhs, rhs, dest);
   2284      break;
   2285    case wasm::SimdOp::I16x8SubSatS:
   2286      masm.subSatInt16x8(lhs, rhs, dest);
   2287      break;
   2288    case wasm::SimdOp::I16x8SubSatU:
   2289      masm.unsignedSubSatInt16x8(lhs, rhs, dest);
   2290      break;
   2291    case wasm::SimdOp::I16x8Mul:
   2292      masm.mulInt16x8(lhs, rhs, dest);
   2293      break;
   2294    case wasm::SimdOp::I16x8MinS:
   2295      masm.minInt16x8(lhs, rhs, dest);
   2296      break;
   2297    case wasm::SimdOp::I16x8MinU:
   2298      masm.unsignedMinInt16x8(lhs, rhs, dest);
   2299      break;
   2300    case wasm::SimdOp::I16x8MaxS:
   2301      masm.maxInt16x8(lhs, rhs, dest);
   2302      break;
   2303    case wasm::SimdOp::I16x8MaxU:
   2304      masm.unsignedMaxInt16x8(lhs, rhs, dest);
   2305      break;
   2306    case wasm::SimdOp::I32x4Add:
   2307      masm.addInt32x4(lhs, rhs, dest);
   2308      break;
   2309    case wasm::SimdOp::I32x4Sub:
   2310      masm.subInt32x4(lhs, rhs, dest);
   2311      break;
   2312    case wasm::SimdOp::I32x4Mul:
   2313      masm.mulInt32x4(lhs, rhs, dest);
   2314      break;
   2315    case wasm::SimdOp::I32x4MinS:
   2316      masm.minInt32x4(lhs, rhs, dest);
   2317      break;
   2318    case wasm::SimdOp::I32x4MinU:
   2319      masm.unsignedMinInt32x4(lhs, rhs, dest);
   2320      break;
   2321    case wasm::SimdOp::I32x4MaxS:
   2322      masm.maxInt32x4(lhs, rhs, dest);
   2323      break;
   2324    case wasm::SimdOp::I32x4MaxU:
   2325      masm.unsignedMaxInt32x4(lhs, rhs, dest);
   2326      break;
   2327    case wasm::SimdOp::I64x2Add:
   2328      masm.addInt64x2(lhs, rhs, dest);
   2329      break;
   2330    case wasm::SimdOp::I64x2Sub:
   2331      masm.subInt64x2(lhs, rhs, dest);
   2332      break;
   2333    case wasm::SimdOp::I64x2Mul:
   2334      masm.mulInt64x2(lhs, rhs, dest, temp1);
   2335      break;
   2336    case wasm::SimdOp::F32x4Add:
   2337      masm.addFloat32x4(lhs, rhs, dest);
   2338      break;
   2339    case wasm::SimdOp::F32x4Sub:
   2340      masm.subFloat32x4(lhs, rhs, dest);
   2341      break;
   2342    case wasm::SimdOp::F32x4Mul:
   2343      masm.mulFloat32x4(lhs, rhs, dest);
   2344      break;
   2345    case wasm::SimdOp::F32x4Div:
   2346      masm.divFloat32x4(lhs, rhs, dest);
   2347      break;
   2348    case wasm::SimdOp::F32x4Min:
   2349      masm.minFloat32x4(lhs, rhs, dest, temp1, temp2);
   2350      break;
   2351    case wasm::SimdOp::F32x4Max:
   2352      masm.maxFloat32x4(lhs, rhs, dest, temp1, temp2);
   2353      break;
   2354    case wasm::SimdOp::F64x2Add:
   2355      masm.addFloat64x2(lhs, rhs, dest);
   2356      break;
   2357    case wasm::SimdOp::F64x2Sub:
   2358      masm.subFloat64x2(lhs, rhs, dest);
   2359      break;
   2360    case wasm::SimdOp::F64x2Mul:
   2361      masm.mulFloat64x2(lhs, rhs, dest);
   2362      break;
   2363    case wasm::SimdOp::F64x2Div:
   2364      masm.divFloat64x2(lhs, rhs, dest);
   2365      break;
   2366    case wasm::SimdOp::F64x2Min:
   2367      masm.minFloat64x2(lhs, rhs, dest, temp1, temp2);
   2368      break;
   2369    case wasm::SimdOp::F64x2Max:
   2370      masm.maxFloat64x2(lhs, rhs, dest, temp1, temp2);
   2371      break;
   2372    case wasm::SimdOp::I8x16Swizzle:
   2373      masm.swizzleInt8x16(lhs, rhs, dest);
   2374      break;
   2375    case wasm::SimdOp::I8x16RelaxedSwizzle:
   2376      masm.swizzleInt8x16Relaxed(lhs, rhs, dest);
   2377      break;
   2378    case wasm::SimdOp::I8x16NarrowI16x8S:
   2379      masm.narrowInt16x8(lhs, rhs, dest);
   2380      break;
   2381    case wasm::SimdOp::I8x16NarrowI16x8U:
   2382      masm.unsignedNarrowInt16x8(lhs, rhs, dest);
   2383      break;
   2384    case wasm::SimdOp::I16x8NarrowI32x4S:
   2385      masm.narrowInt32x4(lhs, rhs, dest);
   2386      break;
   2387    case wasm::SimdOp::I16x8NarrowI32x4U:
   2388      masm.unsignedNarrowInt32x4(lhs, rhs, dest);
   2389      break;
   2390    case wasm::SimdOp::I8x16Eq:
   2391      masm.compareInt8x16(Assembler::Equal, lhs, rhs, dest);
   2392      break;
   2393    case wasm::SimdOp::I8x16Ne:
   2394      masm.compareInt8x16(Assembler::NotEqual, lhs, rhs, dest);
   2395      break;
   2396    case wasm::SimdOp::I8x16LtS:
   2397      masm.compareInt8x16(Assembler::LessThan, lhs, rhs, dest);
   2398      break;
   2399    case wasm::SimdOp::I8x16GtS:
   2400      masm.compareInt8x16(Assembler::GreaterThan, lhs, rhs, dest);
   2401      break;
   2402    case wasm::SimdOp::I8x16LeS:
   2403      masm.compareInt8x16(Assembler::LessThanOrEqual, lhs, rhs, dest);
   2404      break;
   2405    case wasm::SimdOp::I8x16GeS:
   2406      masm.compareInt8x16(Assembler::GreaterThanOrEqual, lhs, rhs, dest);
   2407      break;
   2408    case wasm::SimdOp::I8x16LtU:
   2409      masm.compareInt8x16(Assembler::Below, lhs, rhs, dest);
   2410      break;
   2411    case wasm::SimdOp::I8x16GtU:
   2412      masm.compareInt8x16(Assembler::Above, lhs, rhs, dest);
   2413      break;
   2414    case wasm::SimdOp::I8x16LeU:
   2415      masm.compareInt8x16(Assembler::BelowOrEqual, lhs, rhs, dest);
   2416      break;
   2417    case wasm::SimdOp::I8x16GeU:
   2418      masm.compareInt8x16(Assembler::AboveOrEqual, lhs, rhs, dest);
   2419      break;
   2420    case wasm::SimdOp::I16x8Eq:
   2421      masm.compareInt16x8(Assembler::Equal, lhs, rhs, dest);
   2422      break;
   2423    case wasm::SimdOp::I16x8Ne:
   2424      masm.compareInt16x8(Assembler::NotEqual, lhs, rhs, dest);
   2425      break;
   2426    case wasm::SimdOp::I16x8LtS:
   2427      masm.compareInt16x8(Assembler::LessThan, lhs, rhs, dest);
   2428      break;
   2429    case wasm::SimdOp::I16x8GtS:
   2430      masm.compareInt16x8(Assembler::GreaterThan, lhs, rhs, dest);
   2431      break;
   2432    case wasm::SimdOp::I16x8LeS:
   2433      masm.compareInt16x8(Assembler::LessThanOrEqual, lhs, rhs, dest);
   2434      break;
   2435    case wasm::SimdOp::I16x8GeS:
   2436      masm.compareInt16x8(Assembler::GreaterThanOrEqual, lhs, rhs, dest);
   2437      break;
   2438    case wasm::SimdOp::I16x8LtU:
   2439      masm.compareInt16x8(Assembler::Below, lhs, rhs, dest);
   2440      break;
   2441    case wasm::SimdOp::I16x8GtU:
   2442      masm.compareInt16x8(Assembler::Above, lhs, rhs, dest);
   2443      break;
   2444    case wasm::SimdOp::I16x8LeU:
   2445      masm.compareInt16x8(Assembler::BelowOrEqual, lhs, rhs, dest);
   2446      break;
   2447    case wasm::SimdOp::I16x8GeU:
   2448      masm.compareInt16x8(Assembler::AboveOrEqual, lhs, rhs, dest);
   2449      break;
   2450    case wasm::SimdOp::I32x4Eq:
   2451      masm.compareInt32x4(Assembler::Equal, lhs, rhs, dest);
   2452      break;
   2453    case wasm::SimdOp::I32x4Ne:
   2454      masm.compareInt32x4(Assembler::NotEqual, lhs, rhs, dest);
   2455      break;
   2456    case wasm::SimdOp::I32x4LtS:
   2457      masm.compareInt32x4(Assembler::LessThan, lhs, rhs, dest);
   2458      break;
   2459    case wasm::SimdOp::I32x4GtS:
   2460      masm.compareInt32x4(Assembler::GreaterThan, lhs, rhs, dest);
   2461      break;
   2462    case wasm::SimdOp::I32x4LeS:
   2463      masm.compareInt32x4(Assembler::LessThanOrEqual, lhs, rhs, dest);
   2464      break;
   2465    case wasm::SimdOp::I32x4GeS:
   2466      masm.compareInt32x4(Assembler::GreaterThanOrEqual, lhs, rhs, dest);
   2467      break;
   2468    case wasm::SimdOp::I32x4LtU:
   2469      masm.compareInt32x4(Assembler::Below, lhs, rhs, dest);
   2470      break;
   2471    case wasm::SimdOp::I32x4GtU:
   2472      masm.compareInt32x4(Assembler::Above, lhs, rhs, dest);
   2473      break;
   2474    case wasm::SimdOp::I32x4LeU:
   2475      masm.compareInt32x4(Assembler::BelowOrEqual, lhs, rhs, dest);
   2476      break;
   2477    case wasm::SimdOp::I32x4GeU:
   2478      masm.compareInt32x4(Assembler::AboveOrEqual, lhs, rhs, dest);
   2479      break;
   2480    case wasm::SimdOp::I64x2Eq:
   2481      masm.compareForEqualityInt64x2(Assembler::Equal, lhs, rhs, dest);
   2482      break;
   2483    case wasm::SimdOp::I64x2Ne:
   2484      masm.compareForEqualityInt64x2(Assembler::NotEqual, lhs, rhs, dest);
   2485      break;
   2486    case wasm::SimdOp::I64x2LtS:
   2487      masm.compareForOrderingInt64x2(Assembler::LessThan, lhs, rhs, dest, temp1,
   2488                                     temp2);
   2489      break;
   2490    case wasm::SimdOp::I64x2GtS:
   2491      masm.compareForOrderingInt64x2(Assembler::GreaterThan, lhs, rhs, dest,
   2492                                     temp1, temp2);
   2493      break;
   2494    case wasm::SimdOp::I64x2LeS:
   2495      masm.compareForOrderingInt64x2(Assembler::LessThanOrEqual, lhs, rhs, dest,
   2496                                     temp1, temp2);
   2497      break;
   2498    case wasm::SimdOp::I64x2GeS:
   2499      masm.compareForOrderingInt64x2(Assembler::GreaterThanOrEqual, lhs, rhs,
   2500                                     dest, temp1, temp2);
   2501      break;
   2502    case wasm::SimdOp::F32x4Eq:
   2503      masm.compareFloat32x4(Assembler::Equal, lhs, rhs, dest);
   2504      break;
   2505    case wasm::SimdOp::F32x4Ne:
   2506      masm.compareFloat32x4(Assembler::NotEqual, lhs, rhs, dest);
   2507      break;
   2508    case wasm::SimdOp::F32x4Lt:
   2509      masm.compareFloat32x4(Assembler::LessThan, lhs, rhs, dest);
   2510      break;
   2511    case wasm::SimdOp::F32x4Le:
   2512      masm.compareFloat32x4(Assembler::LessThanOrEqual, lhs, rhs, dest);
   2513      break;
   2514    case wasm::SimdOp::F64x2Eq:
   2515      masm.compareFloat64x2(Assembler::Equal, lhs, rhs, dest);
   2516      break;
   2517    case wasm::SimdOp::F64x2Ne:
   2518      masm.compareFloat64x2(Assembler::NotEqual, lhs, rhs, dest);
   2519      break;
   2520    case wasm::SimdOp::F64x2Lt:
   2521      masm.compareFloat64x2(Assembler::LessThan, lhs, rhs, dest);
   2522      break;
   2523    case wasm::SimdOp::F64x2Le:
   2524      masm.compareFloat64x2(Assembler::LessThanOrEqual, lhs, rhs, dest);
   2525      break;
   2526    case wasm::SimdOp::F32x4PMax:
   2527      // `lhs` and `rhs` are swapped, for non-VEX platforms the output is rhs.
   2528      masm.pseudoMaxFloat32x4(lhs, rhs, dest);
   2529      break;
   2530    case wasm::SimdOp::F32x4PMin:
   2531      // `lhs` and `rhs` are swapped, for non-VEX platforms the output is rhs.
   2532      masm.pseudoMinFloat32x4(lhs, rhs, dest);
   2533      break;
   2534    case wasm::SimdOp::F64x2PMax:
   2535      // `lhs` and `rhs` are swapped, for non-VEX platforms the output is rhs.
   2536      masm.pseudoMaxFloat64x2(lhs, rhs, dest);
   2537      break;
   2538    case wasm::SimdOp::F64x2PMin:
   2539      // `lhs` and `rhs` are swapped, for non-VEX platforms the output is rhs.
   2540      masm.pseudoMinFloat64x2(lhs, rhs, dest);
   2541      break;
   2542    case wasm::SimdOp::I32x4DotI16x8S:
   2543      masm.widenDotInt16x8(lhs, rhs, dest);
   2544      break;
   2545    case wasm::SimdOp::I16x8ExtmulLowI8x16S:
   2546      masm.extMulLowInt8x16(lhs, rhs, dest);
   2547      break;
   2548    case wasm::SimdOp::I16x8ExtmulHighI8x16S:
   2549      masm.extMulHighInt8x16(lhs, rhs, dest);
   2550      break;
   2551    case wasm::SimdOp::I16x8ExtmulLowI8x16U:
   2552      masm.unsignedExtMulLowInt8x16(lhs, rhs, dest);
   2553      break;
   2554    case wasm::SimdOp::I16x8ExtmulHighI8x16U:
   2555      masm.unsignedExtMulHighInt8x16(lhs, rhs, dest);
   2556      break;
   2557    case wasm::SimdOp::I32x4ExtmulLowI16x8S:
   2558      masm.extMulLowInt16x8(lhs, rhs, dest);
   2559      break;
   2560    case wasm::SimdOp::I32x4ExtmulHighI16x8S:
   2561      masm.extMulHighInt16x8(lhs, rhs, dest);
   2562      break;
   2563    case wasm::SimdOp::I32x4ExtmulLowI16x8U:
   2564      masm.unsignedExtMulLowInt16x8(lhs, rhs, dest);
   2565      break;
   2566    case wasm::SimdOp::I32x4ExtmulHighI16x8U:
   2567      masm.unsignedExtMulHighInt16x8(lhs, rhs, dest);
   2568      break;
   2569    case wasm::SimdOp::I64x2ExtmulLowI32x4S:
   2570      masm.extMulLowInt32x4(lhs, rhs, dest);
   2571      break;
   2572    case wasm::SimdOp::I64x2ExtmulHighI32x4S:
   2573      masm.extMulHighInt32x4(lhs, rhs, dest);
   2574      break;
   2575    case wasm::SimdOp::I64x2ExtmulLowI32x4U:
   2576      masm.unsignedExtMulLowInt32x4(lhs, rhs, dest);
   2577      break;
   2578    case wasm::SimdOp::I64x2ExtmulHighI32x4U:
   2579      masm.unsignedExtMulHighInt32x4(lhs, rhs, dest);
   2580      break;
   2581    case wasm::SimdOp::I16x8Q15MulrSatS:
   2582      masm.q15MulrSatInt16x8(lhs, rhs, dest);
   2583      break;
   2584    case wasm::SimdOp::F32x4RelaxedMin:
   2585      masm.minFloat32x4Relaxed(lhs, rhs, dest);
   2586      break;
   2587    case wasm::SimdOp::F32x4RelaxedMax:
   2588      masm.maxFloat32x4Relaxed(lhs, rhs, dest);
   2589      break;
   2590    case wasm::SimdOp::F64x2RelaxedMin:
   2591      masm.minFloat64x2Relaxed(lhs, rhs, dest);
   2592      break;
   2593    case wasm::SimdOp::F64x2RelaxedMax:
   2594      masm.maxFloat64x2Relaxed(lhs, rhs, dest);
   2595      break;
   2596    case wasm::SimdOp::I16x8RelaxedQ15MulrS:
   2597      masm.q15MulrInt16x8Relaxed(lhs, rhs, dest);
   2598      break;
   2599    case wasm::SimdOp::I16x8RelaxedDotI8x16I7x16S:
   2600      masm.dotInt8x16Int7x16(lhs, rhs, dest);
   2601      break;
   2602    case wasm::SimdOp::MozPMADDUBSW:
   2603      masm.vpmaddubsw(rhs, lhs, dest);
   2604      break;
   2605    default:
   2606      MOZ_CRASH("Binary SimdOp not implemented");
   2607  }
   2608 #else
   2609  MOZ_CRASH("No SIMD");
   2610 #endif
   2611 }
   2612 
   2613 void CodeGenerator::visitWasmBinarySimd128WithConstant(
   2614    LWasmBinarySimd128WithConstant* ins) {
   2615 #ifdef ENABLE_WASM_SIMD
   2616  FloatRegister lhs = ToFloatRegister(ins->lhs());
   2617  const SimdConstant& rhs = ins->rhs();
   2618  FloatRegister dest = ToFloatRegister(ins->output());
   2619  FloatRegister temp = ToTempFloatRegisterOrInvalid(ins->temp0());
   2620 
   2621  switch (ins->mir()->simdOp()) {
   2622    case wasm::SimdOp::I8x16Add:
   2623      masm.addInt8x16(lhs, rhs, dest);
   2624      break;
   2625    case wasm::SimdOp::I16x8Add:
   2626      masm.addInt16x8(lhs, rhs, dest);
   2627      break;
   2628    case wasm::SimdOp::I32x4Add:
   2629      masm.addInt32x4(lhs, rhs, dest);
   2630      break;
   2631    case wasm::SimdOp::I64x2Add:
   2632      masm.addInt64x2(lhs, rhs, dest);
   2633      break;
   2634    case wasm::SimdOp::I8x16Sub:
   2635      masm.subInt8x16(lhs, rhs, dest);
   2636      break;
   2637    case wasm::SimdOp::I16x8Sub:
   2638      masm.subInt16x8(lhs, rhs, dest);
   2639      break;
   2640    case wasm::SimdOp::I32x4Sub:
   2641      masm.subInt32x4(lhs, rhs, dest);
   2642      break;
   2643    case wasm::SimdOp::I64x2Sub:
   2644      masm.subInt64x2(lhs, rhs, dest);
   2645      break;
   2646    case wasm::SimdOp::I16x8Mul:
   2647      masm.mulInt16x8(lhs, rhs, dest);
   2648      break;
   2649    case wasm::SimdOp::I32x4Mul:
   2650      masm.mulInt32x4(lhs, rhs, dest);
   2651      break;
   2652    case wasm::SimdOp::I8x16AddSatS:
   2653      masm.addSatInt8x16(lhs, rhs, dest);
   2654      break;
   2655    case wasm::SimdOp::I8x16AddSatU:
   2656      masm.unsignedAddSatInt8x16(lhs, rhs, dest);
   2657      break;
   2658    case wasm::SimdOp::I16x8AddSatS:
   2659      masm.addSatInt16x8(lhs, rhs, dest);
   2660      break;
   2661    case wasm::SimdOp::I16x8AddSatU:
   2662      masm.unsignedAddSatInt16x8(lhs, rhs, dest);
   2663      break;
   2664    case wasm::SimdOp::I8x16SubSatS:
   2665      masm.subSatInt8x16(lhs, rhs, dest);
   2666      break;
   2667    case wasm::SimdOp::I8x16SubSatU:
   2668      masm.unsignedSubSatInt8x16(lhs, rhs, dest);
   2669      break;
   2670    case wasm::SimdOp::I16x8SubSatS:
   2671      masm.subSatInt16x8(lhs, rhs, dest);
   2672      break;
   2673    case wasm::SimdOp::I16x8SubSatU:
   2674      masm.unsignedSubSatInt16x8(lhs, rhs, dest);
   2675      break;
   2676    case wasm::SimdOp::I8x16MinS:
   2677      masm.minInt8x16(lhs, rhs, dest);
   2678      break;
   2679    case wasm::SimdOp::I8x16MinU:
   2680      masm.unsignedMinInt8x16(lhs, rhs, dest);
   2681      break;
   2682    case wasm::SimdOp::I16x8MinS:
   2683      masm.minInt16x8(lhs, rhs, dest);
   2684      break;
   2685    case wasm::SimdOp::I16x8MinU:
   2686      masm.unsignedMinInt16x8(lhs, rhs, dest);
   2687      break;
   2688    case wasm::SimdOp::I32x4MinS:
   2689      masm.minInt32x4(lhs, rhs, dest);
   2690      break;
   2691    case wasm::SimdOp::I32x4MinU:
   2692      masm.unsignedMinInt32x4(lhs, rhs, dest);
   2693      break;
   2694    case wasm::SimdOp::I8x16MaxS:
   2695      masm.maxInt8x16(lhs, rhs, dest);
   2696      break;
   2697    case wasm::SimdOp::I8x16MaxU:
   2698      masm.unsignedMaxInt8x16(lhs, rhs, dest);
   2699      break;
   2700    case wasm::SimdOp::I16x8MaxS:
   2701      masm.maxInt16x8(lhs, rhs, dest);
   2702      break;
   2703    case wasm::SimdOp::I16x8MaxU:
   2704      masm.unsignedMaxInt16x8(lhs, rhs, dest);
   2705      break;
   2706    case wasm::SimdOp::I32x4MaxS:
   2707      masm.maxInt32x4(lhs, rhs, dest);
   2708      break;
   2709    case wasm::SimdOp::I32x4MaxU:
   2710      masm.unsignedMaxInt32x4(lhs, rhs, dest);
   2711      break;
   2712    case wasm::SimdOp::V128And:
   2713      masm.bitwiseAndSimd128(lhs, rhs, dest);
   2714      break;
   2715    case wasm::SimdOp::V128Or:
   2716      masm.bitwiseOrSimd128(lhs, rhs, dest);
   2717      break;
   2718    case wasm::SimdOp::V128Xor:
   2719      masm.bitwiseXorSimd128(lhs, rhs, dest);
   2720      break;
   2721    case wasm::SimdOp::I8x16Eq:
   2722      masm.compareInt8x16(Assembler::Equal, lhs, rhs, dest);
   2723      break;
   2724    case wasm::SimdOp::I8x16Ne:
   2725      masm.compareInt8x16(Assembler::NotEqual, lhs, rhs, dest);
   2726      break;
   2727    case wasm::SimdOp::I8x16GtS:
   2728      masm.compareInt8x16(Assembler::GreaterThan, lhs, rhs, dest);
   2729      break;
   2730    case wasm::SimdOp::I8x16LeS:
   2731      masm.compareInt8x16(Assembler::LessThanOrEqual, lhs, rhs, dest);
   2732      break;
   2733    case wasm::SimdOp::I16x8Eq:
   2734      masm.compareInt16x8(Assembler::Equal, lhs, rhs, dest);
   2735      break;
   2736    case wasm::SimdOp::I16x8Ne:
   2737      masm.compareInt16x8(Assembler::NotEqual, lhs, rhs, dest);
   2738      break;
   2739    case wasm::SimdOp::I16x8GtS:
   2740      masm.compareInt16x8(Assembler::GreaterThan, lhs, rhs, dest);
   2741      break;
   2742    case wasm::SimdOp::I16x8LeS:
   2743      masm.compareInt16x8(Assembler::LessThanOrEqual, lhs, rhs, dest);
   2744      break;
   2745    case wasm::SimdOp::I32x4Eq:
   2746      masm.compareInt32x4(Assembler::Equal, lhs, rhs, dest);
   2747      break;
   2748    case wasm::SimdOp::I32x4Ne:
   2749      masm.compareInt32x4(Assembler::NotEqual, lhs, rhs, dest);
   2750      break;
   2751    case wasm::SimdOp::I32x4GtS:
   2752      masm.compareInt32x4(Assembler::GreaterThan, lhs, rhs, dest);
   2753      break;
   2754    case wasm::SimdOp::I32x4LeS:
   2755      masm.compareInt32x4(Assembler::LessThanOrEqual, lhs, rhs, dest);
   2756      break;
   2757    case wasm::SimdOp::I64x2Mul:
   2758      masm.mulInt64x2(lhs, rhs, dest, temp);
   2759      break;
   2760    case wasm::SimdOp::F32x4Eq:
   2761      masm.compareFloat32x4(Assembler::Equal, lhs, rhs, dest);
   2762      break;
   2763    case wasm::SimdOp::F32x4Ne:
   2764      masm.compareFloat32x4(Assembler::NotEqual, lhs, rhs, dest);
   2765      break;
   2766    case wasm::SimdOp::F32x4Lt:
   2767      masm.compareFloat32x4(Assembler::LessThan, lhs, rhs, dest);
   2768      break;
   2769    case wasm::SimdOp::F32x4Le:
   2770      masm.compareFloat32x4(Assembler::LessThanOrEqual, lhs, rhs, dest);
   2771      break;
   2772    case wasm::SimdOp::F64x2Eq:
   2773      masm.compareFloat64x2(Assembler::Equal, lhs, rhs, dest);
   2774      break;
   2775    case wasm::SimdOp::F64x2Ne:
   2776      masm.compareFloat64x2(Assembler::NotEqual, lhs, rhs, dest);
   2777      break;
   2778    case wasm::SimdOp::F64x2Lt:
   2779      masm.compareFloat64x2(Assembler::LessThan, lhs, rhs, dest);
   2780      break;
   2781    case wasm::SimdOp::F64x2Le:
   2782      masm.compareFloat64x2(Assembler::LessThanOrEqual, lhs, rhs, dest);
   2783      break;
   2784    case wasm::SimdOp::I32x4DotI16x8S:
   2785      masm.widenDotInt16x8(lhs, rhs, dest);
   2786      break;
   2787    case wasm::SimdOp::F32x4Add:
   2788      masm.addFloat32x4(lhs, rhs, dest);
   2789      break;
   2790    case wasm::SimdOp::F64x2Add:
   2791      masm.addFloat64x2(lhs, rhs, dest);
   2792      break;
   2793    case wasm::SimdOp::F32x4Sub:
   2794      masm.subFloat32x4(lhs, rhs, dest);
   2795      break;
   2796    case wasm::SimdOp::F64x2Sub:
   2797      masm.subFloat64x2(lhs, rhs, dest);
   2798      break;
   2799    case wasm::SimdOp::F32x4Div:
   2800      masm.divFloat32x4(lhs, rhs, dest);
   2801      break;
   2802    case wasm::SimdOp::F64x2Div:
   2803      masm.divFloat64x2(lhs, rhs, dest);
   2804      break;
   2805    case wasm::SimdOp::F32x4Mul:
   2806      masm.mulFloat32x4(lhs, rhs, dest);
   2807      break;
   2808    case wasm::SimdOp::F64x2Mul:
   2809      masm.mulFloat64x2(lhs, rhs, dest);
   2810      break;
   2811    case wasm::SimdOp::I8x16NarrowI16x8S:
   2812      masm.narrowInt16x8(lhs, rhs, dest);
   2813      break;
   2814    case wasm::SimdOp::I8x16NarrowI16x8U:
   2815      masm.unsignedNarrowInt16x8(lhs, rhs, dest);
   2816      break;
   2817    case wasm::SimdOp::I16x8NarrowI32x4S:
   2818      masm.narrowInt32x4(lhs, rhs, dest);
   2819      break;
   2820    case wasm::SimdOp::I16x8NarrowI32x4U:
   2821      masm.unsignedNarrowInt32x4(lhs, rhs, dest);
   2822      break;
   2823    default:
   2824      MOZ_CRASH("Binary SimdOp with constant not implemented");
   2825  }
   2826 #else
   2827  MOZ_CRASH("No SIMD");
   2828 #endif
   2829 }
   2830 
   2831 void CodeGenerator::visitWasmVariableShiftSimd128(
   2832    LWasmVariableShiftSimd128* ins) {
   2833 #ifdef ENABLE_WASM_SIMD
   2834  FloatRegister lhsDest = ToFloatRegister(ins->lhs());
   2835  Register rhs = ToRegister(ins->rhs());
   2836  FloatRegister temp = ToTempFloatRegisterOrInvalid(ins->temp0());
   2837 
   2838  MOZ_ASSERT(ToFloatRegister(ins->output()) == lhsDest);
   2839 
   2840  switch (ins->mir()->simdOp()) {
   2841    case wasm::SimdOp::I8x16Shl:
   2842      masm.leftShiftInt8x16(rhs, lhsDest, temp);
   2843      break;
   2844    case wasm::SimdOp::I8x16ShrS:
   2845      masm.rightShiftInt8x16(rhs, lhsDest, temp);
   2846      break;
   2847    case wasm::SimdOp::I8x16ShrU:
   2848      masm.unsignedRightShiftInt8x16(rhs, lhsDest, temp);
   2849      break;
   2850    case wasm::SimdOp::I16x8Shl:
   2851      masm.leftShiftInt16x8(rhs, lhsDest);
   2852      break;
   2853    case wasm::SimdOp::I16x8ShrS:
   2854      masm.rightShiftInt16x8(rhs, lhsDest);
   2855      break;
   2856    case wasm::SimdOp::I16x8ShrU:
   2857      masm.unsignedRightShiftInt16x8(rhs, lhsDest);
   2858      break;
   2859    case wasm::SimdOp::I32x4Shl:
   2860      masm.leftShiftInt32x4(rhs, lhsDest);
   2861      break;
   2862    case wasm::SimdOp::I32x4ShrS:
   2863      masm.rightShiftInt32x4(rhs, lhsDest);
   2864      break;
   2865    case wasm::SimdOp::I32x4ShrU:
   2866      masm.unsignedRightShiftInt32x4(rhs, lhsDest);
   2867      break;
   2868    case wasm::SimdOp::I64x2Shl:
   2869      masm.leftShiftInt64x2(rhs, lhsDest);
   2870      break;
   2871    case wasm::SimdOp::I64x2ShrS:
   2872      masm.rightShiftInt64x2(rhs, lhsDest, temp);
   2873      break;
   2874    case wasm::SimdOp::I64x2ShrU:
   2875      masm.unsignedRightShiftInt64x2(rhs, lhsDest);
   2876      break;
   2877    default:
   2878      MOZ_CRASH("Shift SimdOp not implemented");
   2879  }
   2880 #else
   2881  MOZ_CRASH("No SIMD");
   2882 #endif
   2883 }
   2884 
   2885 void CodeGenerator::visitWasmConstantShiftSimd128(
   2886    LWasmConstantShiftSimd128* ins) {
   2887 #ifdef ENABLE_WASM_SIMD
   2888  FloatRegister src = ToFloatRegister(ins->src());
   2889  FloatRegister dest = ToFloatRegister(ins->output());
   2890  int32_t shift = ins->shift();
   2891 
   2892  if (shift == 0) {
   2893    masm.moveSimd128(src, dest);
   2894    return;
   2895  }
   2896 
   2897  switch (ins->mir()->simdOp()) {
   2898    case wasm::SimdOp::I8x16Shl:
   2899      masm.leftShiftInt8x16(Imm32(shift), src, dest);
   2900      break;
   2901    case wasm::SimdOp::I8x16ShrS:
   2902      masm.rightShiftInt8x16(Imm32(shift), src, dest);
   2903      break;
   2904    case wasm::SimdOp::I8x16ShrU:
   2905      masm.unsignedRightShiftInt8x16(Imm32(shift), src, dest);
   2906      break;
   2907    case wasm::SimdOp::I16x8Shl:
   2908      masm.leftShiftInt16x8(Imm32(shift), src, dest);
   2909      break;
   2910    case wasm::SimdOp::I16x8ShrS:
   2911      masm.rightShiftInt16x8(Imm32(shift), src, dest);
   2912      break;
   2913    case wasm::SimdOp::I16x8ShrU:
   2914      masm.unsignedRightShiftInt16x8(Imm32(shift), src, dest);
   2915      break;
   2916    case wasm::SimdOp::I32x4Shl:
   2917      masm.leftShiftInt32x4(Imm32(shift), src, dest);
   2918      break;
   2919    case wasm::SimdOp::I32x4ShrS:
   2920      masm.rightShiftInt32x4(Imm32(shift), src, dest);
   2921      break;
   2922    case wasm::SimdOp::I32x4ShrU:
   2923      masm.unsignedRightShiftInt32x4(Imm32(shift), src, dest);
   2924      break;
   2925    case wasm::SimdOp::I64x2Shl:
   2926      masm.leftShiftInt64x2(Imm32(shift), src, dest);
   2927      break;
   2928    case wasm::SimdOp::I64x2ShrS:
   2929      masm.rightShiftInt64x2(Imm32(shift), src, dest);
   2930      break;
   2931    case wasm::SimdOp::I64x2ShrU:
   2932      masm.unsignedRightShiftInt64x2(Imm32(shift), src, dest);
   2933      break;
   2934    default:
   2935      MOZ_CRASH("Shift SimdOp not implemented");
   2936  }
   2937 #else
   2938  MOZ_CRASH("No SIMD");
   2939 #endif
   2940 }
   2941 
   2942 void CodeGenerator::visitWasmSignReplicationSimd128(
   2943    LWasmSignReplicationSimd128* ins) {
   2944 #ifdef ENABLE_WASM_SIMD
   2945  FloatRegister src = ToFloatRegister(ins->src());
   2946  FloatRegister dest = ToFloatRegister(ins->output());
   2947 
   2948  switch (ins->mir()->simdOp()) {
   2949    case wasm::SimdOp::I8x16ShrS:
   2950      masm.signReplicationInt8x16(src, dest);
   2951      break;
   2952    case wasm::SimdOp::I16x8ShrS:
   2953      masm.signReplicationInt16x8(src, dest);
   2954      break;
   2955    case wasm::SimdOp::I32x4ShrS:
   2956      masm.signReplicationInt32x4(src, dest);
   2957      break;
   2958    case wasm::SimdOp::I64x2ShrS:
   2959      masm.signReplicationInt64x2(src, dest);
   2960      break;
   2961    default:
   2962      MOZ_CRASH("Shift SimdOp unsupported sign replication optimization");
   2963  }
   2964 #else
   2965  MOZ_CRASH("No SIMD");
   2966 #endif
   2967 }
   2968 
   2969 void CodeGenerator::visitWasmShuffleSimd128(LWasmShuffleSimd128* ins) {
   2970 #ifdef ENABLE_WASM_SIMD
   2971  FloatRegister lhsDest = ToFloatRegister(ins->lhs());
   2972  FloatRegister rhs = ToFloatRegister(ins->rhs());
   2973  SimdConstant control = ins->control();
   2974  FloatRegister output = ToFloatRegister(ins->output());
   2975  switch (ins->op()) {
   2976    case SimdShuffleOp::BLEND_8x16: {
   2977      masm.blendInt8x16(reinterpret_cast<const uint8_t*>(control.asInt8x16()),
   2978                        lhsDest, rhs, output, ToFloatRegister(ins->temp0()));
   2979      break;
   2980    }
   2981    case SimdShuffleOp::BLEND_16x8: {
   2982      MOZ_ASSERT(ins->temp0()->isBogusTemp());
   2983      masm.blendInt16x8(reinterpret_cast<const uint16_t*>(control.asInt16x8()),
   2984                        lhsDest, rhs, output);
   2985      break;
   2986    }
   2987    case SimdShuffleOp::CONCAT_RIGHT_SHIFT_8x16: {
   2988      MOZ_ASSERT(ins->temp0()->isBogusTemp());
   2989      int8_t count = 16 - control.asInt8x16()[0];
   2990      MOZ_ASSERT(count > 0, "Should have been a MOVE operation");
   2991      masm.concatAndRightShiftSimd128(lhsDest, rhs, output, count);
   2992      break;
   2993    }
   2994    case SimdShuffleOp::INTERLEAVE_HIGH_8x16: {
   2995      MOZ_ASSERT(ins->temp0()->isBogusTemp());
   2996      masm.interleaveHighInt8x16(lhsDest, rhs, output);
   2997      break;
   2998    }
   2999    case SimdShuffleOp::INTERLEAVE_HIGH_16x8: {
   3000      MOZ_ASSERT(ins->temp0()->isBogusTemp());
   3001      masm.interleaveHighInt16x8(lhsDest, rhs, output);
   3002      break;
   3003    }
   3004    case SimdShuffleOp::INTERLEAVE_HIGH_32x4: {
   3005      MOZ_ASSERT(ins->temp0()->isBogusTemp());
   3006      masm.interleaveHighInt32x4(lhsDest, rhs, output);
   3007      break;
   3008    }
   3009    case SimdShuffleOp::INTERLEAVE_HIGH_64x2: {
   3010      MOZ_ASSERT(ins->temp0()->isBogusTemp());
   3011      masm.interleaveHighInt64x2(lhsDest, rhs, output);
   3012      break;
   3013    }
   3014    case SimdShuffleOp::INTERLEAVE_LOW_8x16: {
   3015      MOZ_ASSERT(ins->temp0()->isBogusTemp());
   3016      masm.interleaveLowInt8x16(lhsDest, rhs, output);
   3017      break;
   3018    }
   3019    case SimdShuffleOp::INTERLEAVE_LOW_16x8: {
   3020      MOZ_ASSERT(ins->temp0()->isBogusTemp());
   3021      masm.interleaveLowInt16x8(lhsDest, rhs, output);
   3022      break;
   3023    }
   3024    case SimdShuffleOp::INTERLEAVE_LOW_32x4: {
   3025      MOZ_ASSERT(ins->temp0()->isBogusTemp());
   3026      masm.interleaveLowInt32x4(lhsDest, rhs, output);
   3027      break;
   3028    }
   3029    case SimdShuffleOp::INTERLEAVE_LOW_64x2: {
   3030      MOZ_ASSERT(ins->temp0()->isBogusTemp());
   3031      masm.interleaveLowInt64x2(lhsDest, rhs, output);
   3032      break;
   3033    }
   3034    case SimdShuffleOp::SHUFFLE_BLEND_8x16: {
   3035      masm.shuffleInt8x16(reinterpret_cast<const uint8_t*>(control.asInt8x16()),
   3036                          lhsDest, rhs, output);
   3037      break;
   3038    }
   3039    default: {
   3040      MOZ_CRASH("Unsupported SIMD shuffle operation");
   3041    }
   3042  }
   3043 #else
   3044  MOZ_CRASH("No SIMD");
   3045 #endif
   3046 }
   3047 
   3048 #ifdef ENABLE_WASM_SIMD
   3049 
   3050 enum PermuteX64I16x8Action : uint16_t {
   3051  UNAVAILABLE = 0,
   3052  SWAP_QWORDS = 1,  // Swap qwords first
   3053  PERM_LOW = 2,     // Permute low qword by control_[0..3]
   3054  PERM_HIGH = 4     // Permute high qword by control_[4..7]
   3055 };
   3056 
   3057 // Skip lanes that equal v starting at i, returning the index just beyond the
   3058 // last of those.  There is no requirement that the initial lanes[i] == v.
   3059 template <typename T>
   3060 static int ScanConstant(const T* lanes, int v, int i) {
   3061  int len = int(16 / sizeof(T));
   3062  MOZ_ASSERT(i <= len);
   3063  while (i < len && lanes[i] == v) {
   3064    i++;
   3065  }
   3066  return i;
   3067 }
   3068 
   3069 // Apply a transformation to each lane value.
   3070 template <typename T>
   3071 static void MapLanes(T* result, const T* input, int (*f)(int)) {
   3072  // Hazard analysis trips on "IndirectCall: f" error.
   3073  // Suppress the check -- `f` is expected to be trivial here.
   3074  JS::AutoSuppressGCAnalysis nogc;
   3075 
   3076  int len = int(16 / sizeof(T));
   3077  for (int i = 0; i < len; i++) {
   3078    result[i] = f(input[i]);
   3079  }
   3080 }
   3081 
   3082 // Recognize part of an identity permutation starting at start, with
   3083 // the first value of the permutation expected to be bias.
   3084 template <typename T>
   3085 static bool IsIdentity(const T* lanes, int start, int len, int bias) {
   3086  if (lanes[start] != bias) {
   3087    return false;
   3088  }
   3089  for (int i = start + 1; i < start + len; i++) {
   3090    if (lanes[i] != lanes[i - 1] + 1) {
   3091      return false;
   3092    }
   3093  }
   3094  return true;
   3095 }
   3096 
   3097 // We can permute by words if the mask is reducible to a word mask, but the x64
   3098 // lowering is only efficient if we can permute the high and low quadwords
   3099 // separately, possibly after swapping quadwords.
   3100 static PermuteX64I16x8Action CalculateX64Permute16x8(SimdConstant* control) {
   3101  const SimdConstant::I16x8& lanes = control->asInt16x8();
   3102  SimdConstant::I16x8 mapped;
   3103  MapLanes(mapped, lanes, [](int x) -> int { return x < 4 ? 0 : 1; });
   3104  int i = ScanConstant(mapped, mapped[0], 0);
   3105  if (i != 4) {
   3106    return PermuteX64I16x8Action::UNAVAILABLE;
   3107  }
   3108  i = ScanConstant(mapped, mapped[4], 4);
   3109  if (i != 8) {
   3110    return PermuteX64I16x8Action::UNAVAILABLE;
   3111  }
   3112  // Now compute the operation bits.  `mapped` holds the adjusted lane mask.
   3113  memcpy(mapped, lanes, sizeof(mapped));
   3114  uint16_t op = 0;
   3115  if (mapped[0] > mapped[4]) {
   3116    op |= PermuteX64I16x8Action::SWAP_QWORDS;
   3117  }
   3118  for (auto& m : mapped) {
   3119    m &= 3;
   3120  }
   3121  if (!IsIdentity(mapped, 0, 4, 0)) {
   3122    op |= PermuteX64I16x8Action::PERM_LOW;
   3123  }
   3124  if (!IsIdentity(mapped, 4, 4, 0)) {
   3125    op |= PermuteX64I16x8Action::PERM_HIGH;
   3126  }
   3127  MOZ_ASSERT(op != PermuteX64I16x8Action::UNAVAILABLE);
   3128  *control = SimdConstant::CreateX8(mapped);
   3129  return (PermuteX64I16x8Action)op;
   3130 }
   3131 
   3132 #endif
   3133 
   3134 void CodeGenerator::visitWasmPermuteSimd128(LWasmPermuteSimd128* ins) {
   3135 #ifdef ENABLE_WASM_SIMD
   3136  FloatRegister src = ToFloatRegister(ins->src());
   3137  FloatRegister dest = ToFloatRegister(ins->output());
   3138  SimdConstant control = ins->control();
   3139  switch (ins->op()) {
   3140    // For broadcast, would MOVDDUP be better than PSHUFD for the last step?
   3141    case SimdPermuteOp::BROADCAST_8x16: {
   3142      const SimdConstant::I8x16& mask = control.asInt8x16();
   3143      int8_t source = mask[0];
   3144      if (source == 0 && Assembler::HasAVX2()) {
   3145        masm.vbroadcastb(Operand(src), dest);
   3146        break;
   3147      }
   3148      MOZ_ASSERT_IF(!Assembler::HasAVX(), src == dest);
   3149      if (source < 8) {
   3150        masm.interleaveLowInt8x16(src, src, dest);
   3151      } else {
   3152        masm.interleaveHighInt8x16(src, src, dest);
   3153        source -= 8;
   3154      }
   3155      uint16_t v = uint16_t(source & 3);
   3156      uint16_t wordMask[4] = {v, v, v, v};
   3157      if (source < 4) {
   3158        masm.permuteLowInt16x8(wordMask, dest, dest);
   3159        uint32_t dwordMask[4] = {0, 0, 0, 0};
   3160        masm.permuteInt32x4(dwordMask, dest, dest);
   3161      } else {
   3162        masm.permuteHighInt16x8(wordMask, dest, dest);
   3163        uint32_t dwordMask[4] = {2, 2, 2, 2};
   3164        masm.permuteInt32x4(dwordMask, dest, dest);
   3165      }
   3166      break;
   3167    }
   3168    case SimdPermuteOp::BROADCAST_16x8: {
   3169      const SimdConstant::I16x8& mask = control.asInt16x8();
   3170      int16_t source = mask[0];
   3171      if (source == 0 && Assembler::HasAVX2()) {
   3172        masm.vbroadcastw(Operand(src), dest);
   3173        break;
   3174      }
   3175      uint16_t v = uint16_t(source & 3);
   3176      uint16_t wordMask[4] = {v, v, v, v};
   3177      if (source < 4) {
   3178        masm.permuteLowInt16x8(wordMask, src, dest);
   3179        uint32_t dwordMask[4] = {0, 0, 0, 0};
   3180        masm.permuteInt32x4(dwordMask, dest, dest);
   3181      } else {
   3182        masm.permuteHighInt16x8(wordMask, src, dest);
   3183        uint32_t dwordMask[4] = {2, 2, 2, 2};
   3184        masm.permuteInt32x4(dwordMask, dest, dest);
   3185      }
   3186      break;
   3187    }
   3188    case SimdPermuteOp::MOVE: {
   3189      masm.moveSimd128(src, dest);
   3190      break;
   3191    }
   3192    case SimdPermuteOp::PERMUTE_8x16: {
   3193      const SimdConstant::I8x16& mask = control.asInt8x16();
   3194 #  ifdef DEBUG
   3195      DebugOnly<int> i;
   3196      for (i = 0; i < 16 && mask[i] == i; i++) {
   3197      }
   3198      MOZ_ASSERT(i < 16, "Should have been a MOVE operation");
   3199 #  endif
   3200      masm.permuteInt8x16(reinterpret_cast<const uint8_t*>(mask), src, dest);
   3201      break;
   3202    }
   3203    case SimdPermuteOp::PERMUTE_16x8: {
   3204 #  ifdef DEBUG
   3205      const SimdConstant::I16x8& mask = control.asInt16x8();
   3206      DebugOnly<int> i;
   3207      for (i = 0; i < 8 && mask[i] == i; i++) {
   3208      }
   3209      MOZ_ASSERT(i < 8, "Should have been a MOVE operation");
   3210 #  endif
   3211      PermuteX64I16x8Action op = CalculateX64Permute16x8(&control);
   3212      if (op != PermuteX64I16x8Action::UNAVAILABLE) {
   3213        const SimdConstant::I16x8& mask = control.asInt16x8();
   3214        if (op & PermuteX64I16x8Action::SWAP_QWORDS) {
   3215          uint32_t dwordMask[4] = {2, 3, 0, 1};
   3216          masm.permuteInt32x4(dwordMask, src, dest);
   3217          src = dest;
   3218        }
   3219        if (op & PermuteX64I16x8Action::PERM_LOW) {
   3220          masm.permuteLowInt16x8(reinterpret_cast<const uint16_t*>(mask) + 0,
   3221                                 src, dest);
   3222          src = dest;
   3223        }
   3224        if (op & PermuteX64I16x8Action::PERM_HIGH) {
   3225          masm.permuteHighInt16x8(reinterpret_cast<const uint16_t*>(mask) + 4,
   3226                                  src, dest);
   3227          src = dest;
   3228        }
   3229      } else {
   3230        const SimdConstant::I16x8& wmask = control.asInt16x8();
   3231        uint8_t mask[16];
   3232        for (unsigned i = 0; i < 16; i += 2) {
   3233          mask[i] = wmask[i / 2] * 2;
   3234          mask[i + 1] = wmask[i / 2] * 2 + 1;
   3235        }
   3236        masm.permuteInt8x16(mask, src, dest);
   3237      }
   3238      break;
   3239    }
   3240    case SimdPermuteOp::PERMUTE_32x4: {
   3241      const SimdConstant::I32x4& mask = control.asInt32x4();
   3242      if (Assembler::HasAVX2() && mask[0] == 0 && mask[1] == 0 &&
   3243          mask[2] == 0 && mask[3] == 0) {
   3244        masm.vbroadcastd(Operand(src), dest);
   3245        break;
   3246      }
   3247 #  ifdef DEBUG
   3248      DebugOnly<int> i;
   3249      for (i = 0; i < 4 && mask[i] == i; i++) {
   3250      }
   3251      MOZ_ASSERT(i < 4, "Should have been a MOVE operation");
   3252 #  endif
   3253      masm.permuteInt32x4(reinterpret_cast<const uint32_t*>(mask), src, dest);
   3254      break;
   3255    }
   3256    case SimdPermuteOp::ROTATE_RIGHT_8x16: {
   3257      MOZ_ASSERT_IF(!Assembler::HasAVX(), src == dest);
   3258      int8_t count = control.asInt8x16()[0];
   3259      MOZ_ASSERT(count > 0, "Should have been a MOVE operation");
   3260      masm.concatAndRightShiftSimd128(src, src, dest, count);
   3261      break;
   3262    }
   3263    case SimdPermuteOp::SHIFT_LEFT_8x16: {
   3264      int8_t count = control.asInt8x16()[0];
   3265      MOZ_ASSERT(count > 0, "Should have been a MOVE operation");
   3266      masm.leftShiftSimd128(Imm32(count), src, dest);
   3267      break;
   3268    }
   3269    case SimdPermuteOp::SHIFT_RIGHT_8x16: {
   3270      int8_t count = control.asInt8x16()[0];
   3271      MOZ_ASSERT(count > 0, "Should have been a MOVE operation");
   3272      masm.rightShiftSimd128(Imm32(count), src, dest);
   3273      break;
   3274    }
   3275    case SimdPermuteOp::ZERO_EXTEND_8x16_TO_16x8:
   3276      masm.zeroExtend8x16To16x8(src, dest);
   3277      break;
   3278    case SimdPermuteOp::ZERO_EXTEND_8x16_TO_32x4:
   3279      masm.zeroExtend8x16To32x4(src, dest);
   3280      break;
   3281    case SimdPermuteOp::ZERO_EXTEND_8x16_TO_64x2:
   3282      masm.zeroExtend8x16To64x2(src, dest);
   3283      break;
   3284    case SimdPermuteOp::ZERO_EXTEND_16x8_TO_32x4:
   3285      masm.zeroExtend16x8To32x4(src, dest);
   3286      break;
   3287    case SimdPermuteOp::ZERO_EXTEND_16x8_TO_64x2:
   3288      masm.zeroExtend16x8To64x2(src, dest);
   3289      break;
   3290    case SimdPermuteOp::ZERO_EXTEND_32x4_TO_64x2:
   3291      masm.zeroExtend32x4To64x2(src, dest);
   3292      break;
   3293    case SimdPermuteOp::REVERSE_16x8:
   3294      masm.reverseInt16x8(src, dest);
   3295      break;
   3296    case SimdPermuteOp::REVERSE_32x4:
   3297      masm.reverseInt32x4(src, dest);
   3298      break;
   3299    case SimdPermuteOp::REVERSE_64x2:
   3300      masm.reverseInt64x2(src, dest);
   3301      break;
   3302    default: {
   3303      MOZ_CRASH("Unsupported SIMD permutation operation");
   3304    }
   3305  }
   3306 #else
   3307  MOZ_CRASH("No SIMD");
   3308 #endif
   3309 }
   3310 
   3311 void CodeGenerator::visitWasmReplaceLaneSimd128(LWasmReplaceLaneSimd128* ins) {
   3312 #ifdef ENABLE_WASM_SIMD
   3313  FloatRegister lhs = ToFloatRegister(ins->lhs());
   3314  FloatRegister dest = ToFloatRegister(ins->output());
   3315  const LAllocation* rhs = ins->rhs();
   3316  uint32_t laneIndex = ins->mir()->laneIndex();
   3317 
   3318  switch (ins->mir()->simdOp()) {
   3319    case wasm::SimdOp::I8x16ReplaceLane:
   3320      masm.replaceLaneInt8x16(laneIndex, lhs, ToRegister(rhs), dest);
   3321      break;
   3322    case wasm::SimdOp::I16x8ReplaceLane:
   3323      masm.replaceLaneInt16x8(laneIndex, lhs, ToRegister(rhs), dest);
   3324      break;
   3325    case wasm::SimdOp::I32x4ReplaceLane:
   3326      masm.replaceLaneInt32x4(laneIndex, lhs, ToRegister(rhs), dest);
   3327      break;
   3328    case wasm::SimdOp::F32x4ReplaceLane:
   3329      masm.replaceLaneFloat32x4(laneIndex, lhs, ToFloatRegister(rhs), dest);
   3330      break;
   3331    case wasm::SimdOp::F64x2ReplaceLane:
   3332      masm.replaceLaneFloat64x2(laneIndex, lhs, ToFloatRegister(rhs), dest);
   3333      break;
   3334    default:
   3335      MOZ_CRASH("ReplaceLane SimdOp not implemented");
   3336  }
   3337 #else
   3338  MOZ_CRASH("No SIMD");
   3339 #endif
   3340 }
   3341 
   3342 void CodeGenerator::visitWasmReplaceInt64LaneSimd128(
   3343    LWasmReplaceInt64LaneSimd128* ins) {
   3344 #ifdef ENABLE_WASM_SIMD
   3345  MOZ_RELEASE_ASSERT(ins->mir()->simdOp() == wasm::SimdOp::I64x2ReplaceLane);
   3346  masm.replaceLaneInt64x2(ins->mir()->laneIndex(), ToFloatRegister(ins->lhs()),
   3347                          ToRegister64(ins->rhs()),
   3348                          ToFloatRegister(ins->output()));
   3349 #else
   3350  MOZ_CRASH("No SIMD");
   3351 #endif
   3352 }
   3353 
   3354 void CodeGenerator::visitWasmScalarToSimd128(LWasmScalarToSimd128* ins) {
   3355 #ifdef ENABLE_WASM_SIMD
   3356  FloatRegister dest = ToFloatRegister(ins->output());
   3357 
   3358  switch (ins->mir()->simdOp()) {
   3359    case wasm::SimdOp::I8x16Splat:
   3360      masm.splatX16(ToRegister(ins->src()), dest);
   3361      break;
   3362    case wasm::SimdOp::I16x8Splat:
   3363      masm.splatX8(ToRegister(ins->src()), dest);
   3364      break;
   3365    case wasm::SimdOp::I32x4Splat:
   3366      masm.splatX4(ToRegister(ins->src()), dest);
   3367      break;
   3368    case wasm::SimdOp::F32x4Splat:
   3369      masm.splatX4(ToFloatRegister(ins->src()), dest);
   3370      break;
   3371    case wasm::SimdOp::F64x2Splat:
   3372      masm.splatX2(ToFloatRegister(ins->src()), dest);
   3373      break;
   3374    default:
   3375      MOZ_CRASH("ScalarToSimd128 SimdOp not implemented");
   3376  }
   3377 #else
   3378  MOZ_CRASH("No SIMD");
   3379 #endif
   3380 }
   3381 
   3382 void CodeGenerator::visitWasmInt64ToSimd128(LWasmInt64ToSimd128* ins) {
   3383 #ifdef ENABLE_WASM_SIMD
   3384  Register64 src = ToRegister64(ins->src());
   3385  FloatRegister dest = ToFloatRegister(ins->output());
   3386 
   3387  switch (ins->mir()->simdOp()) {
   3388    case wasm::SimdOp::I64x2Splat:
   3389      masm.splatX2(src, dest);
   3390      break;
   3391    case wasm::SimdOp::V128Load8x8S:
   3392      masm.moveGPR64ToDouble(src, dest);
   3393      masm.widenLowInt8x16(dest, dest);
   3394      break;
   3395    case wasm::SimdOp::V128Load8x8U:
   3396      masm.moveGPR64ToDouble(src, dest);
   3397      masm.unsignedWidenLowInt8x16(dest, dest);
   3398      break;
   3399    case wasm::SimdOp::V128Load16x4S:
   3400      masm.moveGPR64ToDouble(src, dest);
   3401      masm.widenLowInt16x8(dest, dest);
   3402      break;
   3403    case wasm::SimdOp::V128Load16x4U:
   3404      masm.moveGPR64ToDouble(src, dest);
   3405      masm.unsignedWidenLowInt16x8(dest, dest);
   3406      break;
   3407    case wasm::SimdOp::V128Load32x2S:
   3408      masm.moveGPR64ToDouble(src, dest);
   3409      masm.widenLowInt32x4(dest, dest);
   3410      break;
   3411    case wasm::SimdOp::V128Load32x2U:
   3412      masm.moveGPR64ToDouble(src, dest);
   3413      masm.unsignedWidenLowInt32x4(dest, dest);
   3414      break;
   3415    default:
   3416      MOZ_CRASH("Int64ToSimd128 SimdOp not implemented");
   3417  }
   3418 #else
   3419  MOZ_CRASH("No SIMD");
   3420 #endif
   3421 }
   3422 
   3423 void CodeGenerator::visitWasmUnarySimd128(LWasmUnarySimd128* ins) {
   3424 #ifdef ENABLE_WASM_SIMD
   3425  FloatRegister src = ToFloatRegister(ins->src());
   3426  FloatRegister dest = ToFloatRegister(ins->output());
   3427 
   3428  switch (ins->mir()->simdOp()) {
   3429    case wasm::SimdOp::I8x16Neg:
   3430      masm.negInt8x16(src, dest);
   3431      break;
   3432    case wasm::SimdOp::I16x8Neg:
   3433      masm.negInt16x8(src, dest);
   3434      break;
   3435    case wasm::SimdOp::I16x8ExtendLowI8x16S:
   3436      masm.widenLowInt8x16(src, dest);
   3437      break;
   3438    case wasm::SimdOp::I16x8ExtendHighI8x16S:
   3439      masm.widenHighInt8x16(src, dest);
   3440      break;
   3441    case wasm::SimdOp::I16x8ExtendLowI8x16U:
   3442      masm.unsignedWidenLowInt8x16(src, dest);
   3443      break;
   3444    case wasm::SimdOp::I16x8ExtendHighI8x16U:
   3445      masm.unsignedWidenHighInt8x16(src, dest);
   3446      break;
   3447    case wasm::SimdOp::I32x4Neg:
   3448      masm.negInt32x4(src, dest);
   3449      break;
   3450    case wasm::SimdOp::I32x4ExtendLowI16x8S:
   3451      masm.widenLowInt16x8(src, dest);
   3452      break;
   3453    case wasm::SimdOp::I32x4ExtendHighI16x8S:
   3454      masm.widenHighInt16x8(src, dest);
   3455      break;
   3456    case wasm::SimdOp::I32x4ExtendLowI16x8U:
   3457      masm.unsignedWidenLowInt16x8(src, dest);
   3458      break;
   3459    case wasm::SimdOp::I32x4ExtendHighI16x8U:
   3460      masm.unsignedWidenHighInt16x8(src, dest);
   3461      break;
   3462    case wasm::SimdOp::I32x4TruncSatF32x4S:
   3463      masm.truncSatFloat32x4ToInt32x4(src, dest);
   3464      break;
   3465    case wasm::SimdOp::I32x4TruncSatF32x4U:
   3466      masm.unsignedTruncSatFloat32x4ToInt32x4(src, dest,
   3467                                              ToFloatRegister(ins->temp0()));
   3468      break;
   3469    case wasm::SimdOp::I64x2Neg:
   3470      masm.negInt64x2(src, dest);
   3471      break;
   3472    case wasm::SimdOp::I64x2ExtendLowI32x4S:
   3473      masm.widenLowInt32x4(src, dest);
   3474      break;
   3475    case wasm::SimdOp::I64x2ExtendHighI32x4S:
   3476      masm.widenHighInt32x4(src, dest);
   3477      break;
   3478    case wasm::SimdOp::I64x2ExtendLowI32x4U:
   3479      masm.unsignedWidenLowInt32x4(src, dest);
   3480      break;
   3481    case wasm::SimdOp::I64x2ExtendHighI32x4U:
   3482      masm.unsignedWidenHighInt32x4(src, dest);
   3483      break;
   3484    case wasm::SimdOp::F32x4Abs:
   3485      masm.absFloat32x4(src, dest);
   3486      break;
   3487    case wasm::SimdOp::F32x4Neg:
   3488      masm.negFloat32x4(src, dest);
   3489      break;
   3490    case wasm::SimdOp::F32x4Sqrt:
   3491      masm.sqrtFloat32x4(src, dest);
   3492      break;
   3493    case wasm::SimdOp::F32x4ConvertI32x4S:
   3494      masm.convertInt32x4ToFloat32x4(src, dest);
   3495      break;
   3496    case wasm::SimdOp::F32x4ConvertI32x4U:
   3497      masm.unsignedConvertInt32x4ToFloat32x4(src, dest);
   3498      break;
   3499    case wasm::SimdOp::F64x2Abs:
   3500      masm.absFloat64x2(src, dest);
   3501      break;
   3502    case wasm::SimdOp::F64x2Neg:
   3503      masm.negFloat64x2(src, dest);
   3504      break;
   3505    case wasm::SimdOp::F64x2Sqrt:
   3506      masm.sqrtFloat64x2(src, dest);
   3507      break;
   3508    case wasm::SimdOp::V128Not:
   3509      masm.bitwiseNotSimd128(src, dest);
   3510      break;
   3511    case wasm::SimdOp::I8x16Popcnt:
   3512      masm.popcntInt8x16(src, dest, ToFloatRegister(ins->temp0()));
   3513      break;
   3514    case wasm::SimdOp::I8x16Abs:
   3515      masm.absInt8x16(src, dest);
   3516      break;
   3517    case wasm::SimdOp::I16x8Abs:
   3518      masm.absInt16x8(src, dest);
   3519      break;
   3520    case wasm::SimdOp::I32x4Abs:
   3521      masm.absInt32x4(src, dest);
   3522      break;
   3523    case wasm::SimdOp::I64x2Abs:
   3524      masm.absInt64x2(src, dest);
   3525      break;
   3526    case wasm::SimdOp::F32x4Ceil:
   3527      masm.ceilFloat32x4(src, dest);
   3528      break;
   3529    case wasm::SimdOp::F32x4Floor:
   3530      masm.floorFloat32x4(src, dest);
   3531      break;
   3532    case wasm::SimdOp::F32x4Trunc:
   3533      masm.truncFloat32x4(src, dest);
   3534      break;
   3535    case wasm::SimdOp::F32x4Nearest:
   3536      masm.nearestFloat32x4(src, dest);
   3537      break;
   3538    case wasm::SimdOp::F64x2Ceil:
   3539      masm.ceilFloat64x2(src, dest);
   3540      break;
   3541    case wasm::SimdOp::F64x2Floor:
   3542      masm.floorFloat64x2(src, dest);
   3543      break;
   3544    case wasm::SimdOp::F64x2Trunc:
   3545      masm.truncFloat64x2(src, dest);
   3546      break;
   3547    case wasm::SimdOp::F64x2Nearest:
   3548      masm.nearestFloat64x2(src, dest);
   3549      break;
   3550    case wasm::SimdOp::F32x4DemoteF64x2Zero:
   3551      masm.convertFloat64x2ToFloat32x4(src, dest);
   3552      break;
   3553    case wasm::SimdOp::F64x2PromoteLowF32x4:
   3554      masm.convertFloat32x4ToFloat64x2(src, dest);
   3555      break;
   3556    case wasm::SimdOp::F64x2ConvertLowI32x4S:
   3557      masm.convertInt32x4ToFloat64x2(src, dest);
   3558      break;
   3559    case wasm::SimdOp::F64x2ConvertLowI32x4U:
   3560      masm.unsignedConvertInt32x4ToFloat64x2(src, dest);
   3561      break;
   3562    case wasm::SimdOp::I32x4TruncSatF64x2SZero:
   3563      masm.truncSatFloat64x2ToInt32x4(src, dest, ToFloatRegister(ins->temp0()));
   3564      break;
   3565    case wasm::SimdOp::I32x4TruncSatF64x2UZero:
   3566      masm.unsignedTruncSatFloat64x2ToInt32x4(src, dest,
   3567                                              ToFloatRegister(ins->temp0()));
   3568      break;
   3569    case wasm::SimdOp::I16x8ExtaddPairwiseI8x16S:
   3570      masm.extAddPairwiseInt8x16(src, dest);
   3571      break;
   3572    case wasm::SimdOp::I16x8ExtaddPairwiseI8x16U:
   3573      masm.unsignedExtAddPairwiseInt8x16(src, dest);
   3574      break;
   3575    case wasm::SimdOp::I32x4ExtaddPairwiseI16x8S:
   3576      masm.extAddPairwiseInt16x8(src, dest);
   3577      break;
   3578    case wasm::SimdOp::I32x4ExtaddPairwiseI16x8U:
   3579      masm.unsignedExtAddPairwiseInt16x8(src, dest);
   3580      break;
   3581    case wasm::SimdOp::I32x4RelaxedTruncF32x4S:
   3582      masm.truncFloat32x4ToInt32x4Relaxed(src, dest);
   3583      break;
   3584    case wasm::SimdOp::I32x4RelaxedTruncF32x4U:
   3585      masm.unsignedTruncFloat32x4ToInt32x4Relaxed(src, dest);
   3586      break;
   3587    case wasm::SimdOp::I32x4RelaxedTruncF64x2SZero:
   3588      masm.truncFloat64x2ToInt32x4Relaxed(src, dest);
   3589      break;
   3590    case wasm::SimdOp::I32x4RelaxedTruncF64x2UZero:
   3591      masm.unsignedTruncFloat64x2ToInt32x4Relaxed(src, dest);
   3592      break;
   3593    default:
   3594      MOZ_CRASH("Unary SimdOp not implemented");
   3595  }
   3596 #else
   3597  MOZ_CRASH("No SIMD");
   3598 #endif
   3599 }
   3600 
   3601 void CodeGenerator::visitWasmReduceSimd128(LWasmReduceSimd128* ins) {
   3602 #ifdef ENABLE_WASM_SIMD
   3603  FloatRegister src = ToFloatRegister(ins->src());
   3604  const LDefinition* dest = ins->output();
   3605  uint32_t imm = ins->mir()->imm();
   3606 
   3607  switch (ins->mir()->simdOp()) {
   3608    case wasm::SimdOp::V128AnyTrue:
   3609      masm.anyTrueSimd128(src, ToRegister(dest));
   3610      break;
   3611    case wasm::SimdOp::I8x16AllTrue:
   3612      masm.allTrueInt8x16(src, ToRegister(dest));
   3613      break;
   3614    case wasm::SimdOp::I16x8AllTrue:
   3615      masm.allTrueInt16x8(src, ToRegister(dest));
   3616      break;
   3617    case wasm::SimdOp::I32x4AllTrue:
   3618      masm.allTrueInt32x4(src, ToRegister(dest));
   3619      break;
   3620    case wasm::SimdOp::I64x2AllTrue:
   3621      masm.allTrueInt64x2(src, ToRegister(dest));
   3622      break;
   3623    case wasm::SimdOp::I8x16Bitmask:
   3624      masm.bitmaskInt8x16(src, ToRegister(dest));
   3625      break;
   3626    case wasm::SimdOp::I16x8Bitmask:
   3627      masm.bitmaskInt16x8(src, ToRegister(dest));
   3628      break;
   3629    case wasm::SimdOp::I32x4Bitmask:
   3630      masm.bitmaskInt32x4(src, ToRegister(dest));
   3631      break;
   3632    case wasm::SimdOp::I64x2Bitmask:
   3633      masm.bitmaskInt64x2(src, ToRegister(dest));
   3634      break;
   3635    case wasm::SimdOp::I8x16ExtractLaneS:
   3636      masm.extractLaneInt8x16(imm, src, ToRegister(dest));
   3637      break;
   3638    case wasm::SimdOp::I8x16ExtractLaneU:
   3639      masm.unsignedExtractLaneInt8x16(imm, src, ToRegister(dest));
   3640      break;
   3641    case wasm::SimdOp::I16x8ExtractLaneS:
   3642      masm.extractLaneInt16x8(imm, src, ToRegister(dest));
   3643      break;
   3644    case wasm::SimdOp::I16x8ExtractLaneU:
   3645      masm.unsignedExtractLaneInt16x8(imm, src, ToRegister(dest));
   3646      break;
   3647    case wasm::SimdOp::I32x4ExtractLane:
   3648      masm.extractLaneInt32x4(imm, src, ToRegister(dest));
   3649      break;
   3650    case wasm::SimdOp::F32x4ExtractLane:
   3651      masm.extractLaneFloat32x4(imm, src, ToFloatRegister(dest));
   3652      break;
   3653    case wasm::SimdOp::F64x2ExtractLane:
   3654      masm.extractLaneFloat64x2(imm, src, ToFloatRegister(dest));
   3655      break;
   3656    default:
   3657      MOZ_CRASH("Reduce SimdOp not implemented");
   3658  }
   3659 #else
   3660  MOZ_CRASH("No SIMD");
   3661 #endif
   3662 }
   3663 
   3664 void CodeGenerator::visitWasmReduceAndBranchSimd128(
   3665    LWasmReduceAndBranchSimd128* ins) {
   3666 #ifdef ENABLE_WASM_SIMD
   3667  FloatRegister src = ToFloatRegister(ins->src());
   3668 
   3669  switch (ins->simdOp()) {
   3670    case wasm::SimdOp::V128AnyTrue:
   3671      // Set the zero flag if all of the lanes are zero, and branch on that.
   3672      masm.vptest(src, src);
   3673      emitBranch(Assembler::NotEqual, ins->ifTrue(), ins->ifFalse());
   3674      break;
   3675    case wasm::SimdOp::I8x16AllTrue:
   3676    case wasm::SimdOp::I16x8AllTrue:
   3677    case wasm::SimdOp::I32x4AllTrue:
   3678    case wasm::SimdOp::I64x2AllTrue: {
   3679      // Compare all lanes to zero, set the zero flag if none of the lanes are
   3680      // zero, and branch on that.
   3681      ScratchSimd128Scope tmp(masm);
   3682      masm.vpxor(tmp, tmp, tmp);
   3683      switch (ins->simdOp()) {
   3684        case wasm::SimdOp::I8x16AllTrue:
   3685          masm.vpcmpeqb(Operand(src), tmp, tmp);
   3686          break;
   3687        case wasm::SimdOp::I16x8AllTrue:
   3688          masm.vpcmpeqw(Operand(src), tmp, tmp);
   3689          break;
   3690        case wasm::SimdOp::I32x4AllTrue:
   3691          masm.vpcmpeqd(Operand(src), tmp, tmp);
   3692          break;
   3693        case wasm::SimdOp::I64x2AllTrue:
   3694          masm.vpcmpeqq(Operand(src), tmp, tmp);
   3695          break;
   3696        default:
   3697          MOZ_CRASH();
   3698      }
   3699      masm.vptest(tmp, tmp);
   3700      emitBranch(Assembler::Equal, ins->ifTrue(), ins->ifFalse());
   3701      break;
   3702    }
   3703    case wasm::SimdOp::I16x8Bitmask: {
   3704      masm.bitwiseTestSimd128(SimdConstant::SplatX8(0x8000), src);
   3705      emitBranch(Assembler::NotEqual, ins->ifTrue(), ins->ifFalse());
   3706      break;
   3707    }
   3708    default:
   3709      MOZ_CRASH("Reduce-and-branch SimdOp not implemented");
   3710  }
   3711 #else
   3712  MOZ_CRASH("No SIMD");
   3713 #endif
   3714 }
   3715 
   3716 void CodeGenerator::visitWasmReduceSimd128ToInt64(
   3717    LWasmReduceSimd128ToInt64* ins) {
   3718 #ifdef ENABLE_WASM_SIMD
   3719  FloatRegister src = ToFloatRegister(ins->src());
   3720  Register64 dest = ToOutRegister64(ins);
   3721  uint32_t imm = ins->mir()->imm();
   3722 
   3723  switch (ins->mir()->simdOp()) {
   3724    case wasm::SimdOp::I64x2ExtractLane:
   3725      masm.extractLaneInt64x2(imm, src, dest);
   3726      break;
   3727    default:
   3728      MOZ_CRASH("Reduce SimdOp not implemented");
   3729  }
   3730 #else
   3731  MOZ_CRASH("No SIMD");
   3732 #endif
   3733 }
   3734 
   3735 void CodeGenerator::visitWasmLoadLaneSimd128(LWasmLoadLaneSimd128* ins) {
   3736 #ifdef ENABLE_WASM_SIMD
   3737  const MWasmLoadLaneSimd128* mir = ins->mir();
   3738  const wasm::MemoryAccessDesc& access = mir->access();
   3739 
   3740  access.assertOffsetInGuardPages();
   3741  uint32_t offset = access.offset32();
   3742 
   3743  const LAllocation* value = ins->src();
   3744  Operand srcAddr = toMemoryAccessOperand(ins, offset);
   3745 
   3746  switch (mir->laneSize()) {
   3747    case 1: {
   3748      masm.append(access, wasm::TrapMachineInsn::Load8,
   3749                  FaultingCodeOffset(masm.currentOffset()));
   3750      masm.vpinsrb(mir->laneIndex(), srcAddr, ToFloatRegister(value),
   3751                   ToFloatRegister(value));
   3752      break;
   3753    }
   3754    case 2: {
   3755      masm.append(access, wasm::TrapMachineInsn::Load16,
   3756                  FaultingCodeOffset(masm.currentOffset()));
   3757      masm.vpinsrw(mir->laneIndex(), srcAddr, ToFloatRegister(value),
   3758                   ToFloatRegister(value));
   3759      break;
   3760    }
   3761    case 4: {
   3762      masm.append(access, wasm::TrapMachineInsn::Load32,
   3763                  FaultingCodeOffset(masm.currentOffset()));
   3764      masm.vinsertps(mir->laneIndex() << 4, srcAddr, ToFloatRegister(value),
   3765                     ToFloatRegister(value));
   3766      break;
   3767    }
   3768    case 8: {
   3769      masm.append(access, wasm::TrapMachineInsn::Load64,
   3770                  FaultingCodeOffset(masm.currentOffset()));
   3771      if (mir->laneIndex() == 0) {
   3772        masm.vmovlps(srcAddr, ToFloatRegister(value), ToFloatRegister(value));
   3773      } else {
   3774        masm.vmovhps(srcAddr, ToFloatRegister(value), ToFloatRegister(value));
   3775      }
   3776      break;
   3777    }
   3778    default:
   3779      MOZ_CRASH("Unsupported load lane size");
   3780  }
   3781 #else
   3782  MOZ_CRASH("No SIMD");
   3783 #endif
   3784 }
   3785 
   3786 void CodeGenerator::visitWasmStoreLaneSimd128(LWasmStoreLaneSimd128* ins) {
   3787 #ifdef ENABLE_WASM_SIMD
   3788  const MWasmStoreLaneSimd128* mir = ins->mir();
   3789  const wasm::MemoryAccessDesc& access = mir->access();
   3790 
   3791  access.assertOffsetInGuardPages();
   3792  uint32_t offset = access.offset32();
   3793 
   3794  const LAllocation* src = ins->src();
   3795  Operand destAddr = toMemoryAccessOperand(ins, offset);
   3796 
   3797  switch (mir->laneSize()) {
   3798    case 1: {
   3799      masm.append(access, wasm::TrapMachineInsn::Store8,
   3800                  FaultingCodeOffset(masm.currentOffset()));
   3801      masm.vpextrb(mir->laneIndex(), ToFloatRegister(src), destAddr);
   3802      break;
   3803    }
   3804    case 2: {
   3805      masm.append(access, wasm::TrapMachineInsn::Store16,
   3806                  FaultingCodeOffset(masm.currentOffset()));
   3807      masm.vpextrw(mir->laneIndex(), ToFloatRegister(src), destAddr);
   3808      break;
   3809    }
   3810    case 4: {
   3811      masm.append(access, wasm::TrapMachineInsn::Store32,
   3812                  FaultingCodeOffset(masm.currentOffset()));
   3813      unsigned lane = mir->laneIndex();
   3814      if (lane == 0) {
   3815        masm.vmovss(ToFloatRegister(src), destAddr);
   3816      } else {
   3817        masm.vextractps(lane, ToFloatRegister(src), destAddr);
   3818      }
   3819      break;
   3820    }
   3821    case 8: {
   3822      masm.append(access, wasm::TrapMachineInsn::Store64,
   3823                  FaultingCodeOffset(masm.currentOffset()));
   3824      if (mir->laneIndex() == 0) {
   3825        masm.vmovlps(ToFloatRegister(src), destAddr);
   3826      } else {
   3827        masm.vmovhps(ToFloatRegister(src), destAddr);
   3828      }
   3829      break;
   3830    }
   3831    default:
   3832      MOZ_CRASH("Unsupported store lane size");
   3833  }
   3834 #else
   3835  MOZ_CRASH("No SIMD");
   3836 #endif
   3837 }
   3838 
   3839 }  // namespace jit
   3840 }  // namespace js