tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

Lowering-x86-shared.cpp (60454B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 * This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #include "jit/x86-shared/Lowering-x86-shared.h"
      8 
      9 #include "mozilla/MathAlgorithms.h"
     10 
     11 #include "jit/Lowering.h"
     12 #include "jit/MIR-wasm.h"
     13 #include "jit/MIR.h"
     14 #include "wasm/WasmFeatures.h"  // for wasm::ReportSimdAnalysis
     15 
     16 #include "jit/shared/Lowering-shared-inl.h"
     17 
     18 using namespace js;
     19 using namespace js::jit;
     20 
     21 using mozilla::Abs;
     22 using mozilla::FloorLog2;
     23 using mozilla::Maybe;
     24 using mozilla::Nothing;
     25 using mozilla::Some;
     26 
     27 LTableSwitch* LIRGeneratorX86Shared::newLTableSwitch(
     28    const LAllocation& in, const LDefinition& inputCopy) {
     29  return new (alloc()) LTableSwitch(in, inputCopy, temp());
     30 }
     31 
     32 LTableSwitchV* LIRGeneratorX86Shared::newLTableSwitchV(
     33    const LBoxAllocation& in) {
     34  return new (alloc()) LTableSwitchV(in, temp(), tempDouble(), temp());
     35 }
     36 
     37 LUse LIRGeneratorX86Shared::useShiftRegister(MDefinition* mir) {
     38  // Unless BMI2 is available, the shift register must be ecx. x86 can't shift a
     39  // non-ecx register.
     40  if (Assembler::HasBMI2()) {
     41    return useRegister(mir);
     42  }
     43  return useFixed(mir, ecx);
     44 }
     45 
     46 LUse LIRGeneratorX86Shared::useShiftRegisterAtStart(MDefinition* mir) {
     47  // Unless BMI2 is available, the shift register must be ecx. x86 can't shift a
     48  // non-ecx register.
     49  if (Assembler::HasBMI2()) {
     50    return useRegisterAtStart(mir);
     51  }
     52  return useFixedAtStart(mir, ecx);
     53 }
     54 
     55 LDefinition LIRGeneratorX86Shared::tempShift() {
     56  // Unless BMI2 is available, the shift register must be ecx. x86 can't shift a
     57  // non-ecx register.
     58  if (Assembler::HasBMI2()) {
     59    return temp();
     60  }
     61  return tempFixed(ecx);
     62 }
     63 
     64 void LIRGeneratorX86Shared::lowerForShift(LInstructionHelper<1, 2, 0>* ins,
     65                                          MDefinition* mir, MDefinition* lhs,
     66                                          MDefinition* rhs) {
     67  ins->setOperand(0, useRegisterAtStart(lhs));
     68 
     69  if (rhs->isConstant()) {
     70    ins->setOperand(1, useOrConstantAtStart(rhs));
     71    defineReuseInput(ins, mir, 0);
     72  } else if (!mir->isRotate()) {
     73    if (Assembler::HasBMI2()) {
     74      ins->setOperand(1, useRegisterAtStart(rhs));
     75      define(ins, mir);
     76    } else {
     77      ins->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs)
     78                             ? useShiftRegister(rhs)
     79                             : useShiftRegisterAtStart(rhs));
     80      defineReuseInput(ins, mir, 0);
     81    }
     82  } else {
     83    ins->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs)
     84                           ? useFixed(rhs, ecx)
     85                           : useFixedAtStart(rhs, ecx));
     86    defineReuseInput(ins, mir, 0);
     87  }
     88 }
     89 
     90 void LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 1, 0>* ins,
     91                                        MDefinition* mir, MDefinition* input) {
     92  ins->setOperand(0, useRegisterAtStart(input));
     93  defineReuseInput(ins, mir, 0);
     94 }
     95 
     96 void LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 2, 0>* ins,
     97                                        MDefinition* mir, MDefinition* lhs,
     98                                        MDefinition* rhs) {
     99  if (MOZ_UNLIKELY(mir->isAdd() && mir->type() == MIRType::Int32 &&
    100                   rhs->isConstant() && !mir->toAdd()->fallible())) {
    101    // Special case instruction that is widely used in Wasm during address
    102    // calculation. And x86 platform has LEA instruction for it.
    103    // See CodeGenerator::visitAddI for codegen.
    104    ins->setOperand(0, useRegisterAtStart(lhs));
    105    ins->setOperand(1, useOrConstantAtStart(rhs));
    106    define(ins, mir);
    107    return;
    108  }
    109 
    110  ins->setOperand(0, useRegisterAtStart(lhs));
    111  ins->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs)
    112                         ? useOrConstant(rhs)
    113                         : useOrConstantAtStart(rhs));
    114  defineReuseInput(ins, mir, 0);
    115 }
    116 
    117 void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 1, 0>* ins,
    118                                        MDefinition* mir, MDefinition* input) {
    119  // Without AVX, we'll need to use the x86 encodings where the input must be
    120  // the same location as the output.
    121  if (!Assembler::HasAVX()) {
    122    ins->setOperand(0, useRegisterAtStart(input));
    123    defineReuseInput(ins, mir, 0);
    124  } else {
    125    ins->setOperand(0, useRegisterAtStart(input));
    126    define(ins, mir);
    127  }
    128 }
    129 
    130 void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, 0>* ins,
    131                                        MDefinition* mir, MDefinition* lhs,
    132                                        MDefinition* rhs) {
    133  // Without AVX, we'll need to use the x86 encodings where one of the
    134  // inputs must be the same location as the output.
    135  if (!Assembler::HasAVX()) {
    136    ins->setOperand(0, useRegisterAtStart(lhs));
    137    ins->setOperand(
    138        1, willHaveDifferentLIRNodes(lhs, rhs) ? use(rhs) : useAtStart(rhs));
    139    defineReuseInput(ins, mir, 0);
    140  } else {
    141    ins->setOperand(0, useRegisterAtStart(lhs));
    142    ins->setOperand(1, useAtStart(rhs));
    143    define(ins, mir);
    144  }
    145 }
    146 
    147 void LIRGeneratorX86Shared::lowerMulI(MMul* mul, MDefinition* lhs,
    148                                      MDefinition* rhs) {
    149  if (rhs->isConstant()) {
    150    auto* lir = new (alloc()) LMulI(useRegisterAtStart(lhs),
    151                                    useOrConstantAtStart(rhs), LAllocation());
    152    if (mul->fallible()) {
    153      assignSnapshot(lir, mul->bailoutKind());
    154    }
    155    define(lir, mul);
    156    return;
    157  }
    158 
    159  // Note: If we need a negative zero check, lhs is used twice.
    160  LAllocation lhsCopy = mul->canBeNegativeZero() ? use(lhs) : LAllocation();
    161  LMulI* lir = new (alloc())
    162      LMulI(useRegisterAtStart(lhs),
    163            willHaveDifferentLIRNodes(lhs, rhs) ? use(rhs) : useAtStart(rhs),
    164            lhsCopy);
    165  if (mul->fallible()) {
    166    assignSnapshot(lir, mul->bailoutKind());
    167  }
    168  defineReuseInput(lir, mul, 0);
    169 }
    170 
    171 void LIRGeneratorX86Shared::lowerDivI(MDiv* div) {
    172  // Division instructions are slow. Division by constant denominators can be
    173  // rewritten to use other instructions.
    174  if (div->rhs()->isConstant()) {
    175    int32_t rhs = div->rhs()->toConstant()->toInt32();
    176 
    177    // Division by powers of two can be done by shifting, and division by
    178    // other numbers can be done by a reciprocal multiplication technique.
    179    int32_t shift = FloorLog2(Abs(rhs));
    180    if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) {
    181      LAllocation lhs = useRegisterAtStart(div->lhs());
    182 
    183      // When truncated with maybe a non-zero remainder, we have to round the
    184      // result toward 0. This requires an extra register to round up/down
    185      // whether the left-hand-side is signed.
    186      //
    187      // If the numerator might be signed, and needs adjusting, then an extra
    188      // lhs copy is needed to round the result of the integer division towards
    189      // zero.
    190      //
    191      // Otherwise the numerator is unsigned, so does not need adjusting.
    192      bool needRoundNeg = div->canBeNegativeDividend() && div->isTruncated();
    193      LAllocation lhsCopy =
    194          needRoundNeg ? useRegister(div->lhs()) : LAllocation();
    195 
    196      auto* lir = new (alloc()) LDivPowTwoI(lhs, lhsCopy, shift, rhs < 0);
    197      if (div->fallible()) {
    198        assignSnapshot(lir, div->bailoutKind());
    199      }
    200      defineReuseInput(lir, div, 0);
    201      return;
    202    }
    203 
    204 #ifdef JS_CODEGEN_X86
    205    auto* lir = new (alloc())
    206        LDivConstantI(useRegister(div->lhs()), tempFixed(eax), rhs);
    207    if (div->fallible()) {
    208      assignSnapshot(lir, div->bailoutKind());
    209    }
    210    defineFixed(lir, div, LAllocation(AnyRegister(edx)));
    211 #else
    212    auto* lir =
    213        new (alloc()) LDivConstantI(useRegister(div->lhs()), temp(), rhs);
    214    if (div->fallible()) {
    215      assignSnapshot(lir, div->bailoutKind());
    216    }
    217    define(lir, div);
    218 #endif
    219    return;
    220  }
    221 
    222  auto* lir = new (alloc()) LDivI(useFixedAtStart(div->lhs(), eax),
    223                                  useRegister(div->rhs()), tempFixed(edx));
    224  if (div->fallible()) {
    225    assignSnapshot(lir, div->bailoutKind());
    226  }
    227  defineFixed(lir, div, LAllocation(AnyRegister(eax)));
    228 }
    229 
    230 void LIRGeneratorX86Shared::lowerModI(MMod* mod) {
    231  if (mod->rhs()->isConstant()) {
    232    int32_t rhs = mod->rhs()->toConstant()->toInt32();
    233    int32_t shift = FloorLog2(Abs(rhs));
    234    if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) {
    235      auto* lir =
    236          new (alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift);
    237      if (mod->fallible()) {
    238        assignSnapshot(lir, mod->bailoutKind());
    239      }
    240      defineReuseInput(lir, mod, 0);
    241      return;
    242    }
    243 
    244 #ifdef JS_CODEGEN_X86
    245    auto* lir = new (alloc())
    246        LModConstantI(useRegister(mod->lhs()), tempFixed(edx), rhs);
    247    if (mod->fallible()) {
    248      assignSnapshot(lir, mod->bailoutKind());
    249    }
    250    defineFixed(lir, mod, LAllocation(AnyRegister(eax)));
    251 #else
    252    auto* lir =
    253        new (alloc()) LModConstantI(useRegister(mod->lhs()), temp(), rhs);
    254    if (mod->fallible()) {
    255      assignSnapshot(lir, mod->bailoutKind());
    256    }
    257    define(lir, mod);
    258 #endif
    259    return;
    260  }
    261 
    262  auto* lir = new (alloc()) LModI(useFixedAtStart(mod->lhs(), eax),
    263                                  useRegister(mod->rhs()), tempFixed(eax));
    264  if (mod->fallible()) {
    265    assignSnapshot(lir, mod->bailoutKind());
    266  }
    267  defineFixed(lir, mod, LAllocation(AnyRegister(edx)));
    268 }
    269 
    270 void LIRGeneratorX86Shared::lowerWasmSelectI(MWasmSelect* select) {
    271  auto* lir = new (alloc())
    272      LWasmSelect(useRegisterAtStart(select->trueExpr()),
    273                  useAny(select->falseExpr()), useRegister(select->condExpr()));
    274  defineReuseInput(lir, select, LWasmSelect::TrueExprIndex);
    275 }
    276 
    277 void LIRGeneratorX86Shared::lowerWasmSelectI64(MWasmSelect* select) {
    278  auto* lir = new (alloc()) LWasmSelectI64(
    279      useInt64RegisterAtStart(select->trueExpr()),
    280      useInt64(select->falseExpr()), useRegister(select->condExpr()));
    281  defineInt64ReuseInput(lir, select, LWasmSelectI64::TrueExprIndex);
    282 }
    283 
    284 void LIRGenerator::visitAsmJSLoadHeap(MAsmJSLoadHeap* ins) {
    285  MDefinition* base = ins->base();
    286  MOZ_ASSERT(base->type() == MIRType::Int32);
    287 
    288  MDefinition* boundsCheckLimit = ins->boundsCheckLimit();
    289  MOZ_ASSERT_IF(ins->needsBoundsCheck(),
    290                boundsCheckLimit->type() == MIRType::Int32);
    291 
    292  // For simplicity, require a register if we're going to emit a bounds-check
    293  // branch, so that we don't have special cases for constants. This should
    294  // only happen in rare constant-folding cases since asm.js sets the minimum
    295  // heap size based when accessed via constant.
    296  LAllocation baseAlloc = ins->needsBoundsCheck()
    297                              ? useRegisterAtStart(base)
    298                              : useRegisterOrZeroAtStart(base);
    299 
    300  LAllocation limitAlloc = ins->needsBoundsCheck()
    301                               ? useRegisterAtStart(boundsCheckLimit)
    302                               : LAllocation();
    303  LAllocation memoryBaseAlloc = ins->hasMemoryBase()
    304                                    ? useRegisterAtStart(ins->memoryBase())
    305                                    : LAllocation();
    306 
    307  auto* lir =
    308      new (alloc()) LAsmJSLoadHeap(baseAlloc, limitAlloc, memoryBaseAlloc);
    309  define(lir, ins);
    310 }
    311 
    312 void LIRGenerator::visitAsmJSStoreHeap(MAsmJSStoreHeap* ins) {
    313  MDefinition* base = ins->base();
    314  MOZ_ASSERT(base->type() == MIRType::Int32);
    315 
    316  MDefinition* boundsCheckLimit = ins->boundsCheckLimit();
    317  MOZ_ASSERT_IF(ins->needsBoundsCheck(),
    318                boundsCheckLimit->type() == MIRType::Int32);
    319 
    320  // For simplicity, require a register if we're going to emit a bounds-check
    321  // branch, so that we don't have special cases for constants. This should
    322  // only happen in rare constant-folding cases since asm.js sets the minimum
    323  // heap size based when accessed via constant.
    324  LAllocation baseAlloc = ins->needsBoundsCheck()
    325                              ? useRegisterAtStart(base)
    326                              : useRegisterOrZeroAtStart(base);
    327 
    328  LAllocation limitAlloc = ins->needsBoundsCheck()
    329                               ? useRegisterAtStart(boundsCheckLimit)
    330                               : LAllocation();
    331  LAllocation memoryBaseAlloc = ins->hasMemoryBase()
    332                                    ? useRegisterAtStart(ins->memoryBase())
    333                                    : LAllocation();
    334 
    335  LAsmJSStoreHeap* lir = nullptr;
    336  switch (ins->access().type()) {
    337    case Scalar::Int8:
    338    case Scalar::Uint8:
    339 #ifdef JS_CODEGEN_X86
    340      // See comment for LIRGeneratorX86::useByteOpRegister.
    341      lir = new (alloc()) LAsmJSStoreHeap(
    342          baseAlloc, useFixed(ins->value(), eax), limitAlloc, memoryBaseAlloc);
    343      break;
    344 #endif
    345    case Scalar::Int16:
    346    case Scalar::Uint16:
    347    case Scalar::Int32:
    348    case Scalar::Uint32:
    349    case Scalar::Float32:
    350    case Scalar::Float64:
    351      // For now, don't allow constant values. The immediate operand affects
    352      // instruction layout which affects patching.
    353      lir = new (alloc())
    354          LAsmJSStoreHeap(baseAlloc, useRegisterAtStart(ins->value()),
    355                          limitAlloc, memoryBaseAlloc);
    356      break;
    357    case Scalar::Int64:
    358    case Scalar::Simd128:
    359      MOZ_CRASH("NYI");
    360    case Scalar::Uint8Clamped:
    361    case Scalar::BigInt64:
    362    case Scalar::BigUint64:
    363    case Scalar::Float16:
    364    case Scalar::MaxTypedArrayViewType:
    365      MOZ_CRASH("unexpected array type");
    366  }
    367  add(lir, ins);
    368 }
    369 
    370 void LIRGeneratorX86Shared::lowerUDiv(MDiv* div) {
    371  if (div->rhs()->isConstant()) {
    372    // NOTE: the result of toInt32 is coerced to uint32_t.
    373    uint32_t rhs = div->rhs()->toConstant()->toInt32();
    374    int32_t shift = FloorLog2(rhs);
    375 
    376    if (rhs != 0 && uint32_t(1) << shift == rhs) {
    377      auto* lir = new (alloc()) LDivPowTwoI(useRegisterAtStart(div->lhs()),
    378                                            LAllocation(), shift, false);
    379      if (div->fallible()) {
    380        assignSnapshot(lir, div->bailoutKind());
    381      }
    382      defineReuseInput(lir, div, 0);
    383    } else {
    384 #ifdef JS_CODEGEN_X86
    385      auto* lir = new (alloc())
    386          LUDivConstant(useRegister(div->lhs()), tempFixed(eax), rhs);
    387      if (div->fallible()) {
    388        assignSnapshot(lir, div->bailoutKind());
    389      }
    390      defineFixed(lir, div, LAllocation(AnyRegister(edx)));
    391 #else
    392      auto* lir =
    393          new (alloc()) LUDivConstant(useRegister(div->lhs()), temp(), rhs);
    394      if (div->fallible()) {
    395        assignSnapshot(lir, div->bailoutKind());
    396      }
    397      define(lir, div);
    398 #endif
    399    }
    400    return;
    401  }
    402 
    403  auto* lir = new (alloc()) LUDiv(useFixedAtStart(div->lhs(), eax),
    404                                  useRegister(div->rhs()), tempFixed(edx));
    405  if (div->fallible()) {
    406    assignSnapshot(lir, div->bailoutKind());
    407  }
    408  defineFixed(lir, div, LAllocation(AnyRegister(eax)));
    409 }
    410 
    411 void LIRGeneratorX86Shared::lowerUMod(MMod* mod) {
    412  if (mod->rhs()->isConstant()) {
    413    uint32_t rhs = mod->rhs()->toConstant()->toInt32();
    414    int32_t shift = FloorLog2(rhs);
    415 
    416    if (rhs != 0 && uint32_t(1) << shift == rhs) {
    417      auto* lir =
    418          new (alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift);
    419      if (mod->fallible()) {
    420        assignSnapshot(lir, mod->bailoutKind());
    421      }
    422      defineReuseInput(lir, mod, 0);
    423    } else {
    424 #ifdef JS_CODEGEN_X86
    425      auto* lir = new (alloc())
    426          LUModConstant(useRegister(mod->lhs()), tempFixed(edx), rhs);
    427      if (mod->fallible()) {
    428        assignSnapshot(lir, mod->bailoutKind());
    429      }
    430      defineFixed(lir, mod, LAllocation(AnyRegister(eax)));
    431 #else
    432      auto* lir =
    433          new (alloc()) LUModConstant(useRegister(mod->lhs()), temp(), rhs);
    434      if (mod->fallible()) {
    435        assignSnapshot(lir, mod->bailoutKind());
    436      }
    437      define(lir, mod);
    438 #endif
    439    }
    440    return;
    441  }
    442 
    443  auto* lir = new (alloc()) LUMod(useFixedAtStart(mod->lhs(), eax),
    444                                  useRegister(mod->rhs()), tempFixed(eax));
    445  if (mod->fallible()) {
    446    assignSnapshot(lir, mod->bailoutKind());
    447  }
    448  defineFixed(lir, mod, LAllocation(AnyRegister(edx)));
    449 }
    450 
    451 void LIRGeneratorX86Shared::lowerUrshD(MUrsh* mir) {
    452  MDefinition* lhs = mir->lhs();
    453  MDefinition* rhs = mir->rhs();
    454 
    455  MOZ_ASSERT(lhs->type() == MIRType::Int32);
    456  MOZ_ASSERT(rhs->type() == MIRType::Int32);
    457  MOZ_ASSERT(mir->type() == MIRType::Double);
    458 
    459  LUse lhsUse = useRegisterAtStart(lhs);
    460  LAllocation rhsAlloc;
    461  LDefinition tempDef;
    462  if (rhs->isConstant()) {
    463    rhsAlloc = useOrConstant(rhs);
    464    tempDef = tempCopy(lhs, 0);
    465  } else if (Assembler::HasBMI2()) {
    466    rhsAlloc = useRegisterAtStart(rhs);
    467    tempDef = temp();
    468  } else {
    469    rhsAlloc = useShiftRegister(rhs);
    470    tempDef = tempCopy(lhs, 0);
    471  }
    472 
    473  auto* lir = new (alloc()) LUrshD(lhsUse, rhsAlloc, tempDef);
    474  define(lir, mir);
    475 }
    476 
    477 void LIRGeneratorX86Shared::lowerPowOfTwoI(MPow* mir) {
    478  int32_t base = mir->input()->toConstant()->toInt32();
    479  MDefinition* power = mir->power();
    480 
    481  auto* lir = new (alloc()) LPowOfTwoI(useShiftRegister(power), base);
    482  assignSnapshot(lir, mir->bailoutKind());
    483  define(lir, mir);
    484 }
    485 
    486 void LIRGeneratorX86Shared::lowerBigIntPtrLsh(MBigIntPtrLsh* ins) {
    487  auto* lir = new (alloc()) LBigIntPtrLsh(
    488      useRegister(ins->lhs()), useRegister(ins->rhs()), temp(), tempShift());
    489  assignSnapshot(lir, ins->bailoutKind());
    490  define(lir, ins);
    491 }
    492 
    493 void LIRGeneratorX86Shared::lowerBigIntPtrRsh(MBigIntPtrRsh* ins) {
    494  auto* lir = new (alloc()) LBigIntPtrRsh(
    495      useRegister(ins->lhs()), useRegister(ins->rhs()), temp(), tempShift());
    496  assignSnapshot(lir, ins->bailoutKind());
    497  define(lir, ins);
    498 }
    499 
    500 void LIRGeneratorX86Shared::lowerCompareExchangeTypedArrayElement(
    501    MCompareExchangeTypedArrayElement* ins, bool useI386ByteRegisters) {
    502  MOZ_ASSERT(!Scalar::isFloatingType(ins->arrayType()));
    503  MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
    504  MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr);
    505 
    506  const LUse elements = useRegister(ins->elements());
    507  const LAllocation index =
    508      useRegisterOrIndexConstant(ins->index(), ins->arrayType());
    509 
    510  // If the target is a floating register then we need a temp at the
    511  // lower level; that temp must be eax.
    512  //
    513  // Otherwise the target (if used) is an integer register, which
    514  // must be eax.  If the target is not used the machine code will
    515  // still clobber eax, so just pretend it's used.
    516  //
    517  // oldval must be in a register.
    518  //
    519  // newval must be in a register.  If the source is a byte array
    520  // then newval must be a register that has a byte size: on x86
    521  // this must be ebx, ecx, or edx (eax is taken for the output).
    522  //
    523  // Bug #1077036 describes some further optimization opportunities.
    524 
    525  bool fixedOutput = false;
    526  LDefinition tempDef = LDefinition::BogusTemp();
    527  LAllocation newval;
    528  if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) {
    529    tempDef = tempFixed(eax);
    530    newval = useRegister(ins->newval());
    531  } else {
    532    fixedOutput = true;
    533    if (useI386ByteRegisters && ins->isByteArray()) {
    534      newval = useFixed(ins->newval(), ebx);
    535    } else {
    536      newval = useRegister(ins->newval());
    537    }
    538  }
    539 
    540  const LAllocation oldval = useRegister(ins->oldval());
    541 
    542  LCompareExchangeTypedArrayElement* lir =
    543      new (alloc()) LCompareExchangeTypedArrayElement(elements, index, oldval,
    544                                                      newval, tempDef);
    545 
    546  if (fixedOutput) {
    547    defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
    548  } else {
    549    define(lir, ins);
    550  }
    551 }
    552 
    553 void LIRGeneratorX86Shared::lowerAtomicExchangeTypedArrayElement(
    554    MAtomicExchangeTypedArrayElement* ins, bool useI386ByteRegisters) {
    555  MOZ_ASSERT(ins->arrayType() <= Scalar::Uint32);
    556 
    557  MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
    558  MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr);
    559 
    560  const LUse elements = useRegister(ins->elements());
    561  const LAllocation index =
    562      useRegisterOrIndexConstant(ins->index(), ins->arrayType());
    563  const LAllocation value = useRegister(ins->value());
    564 
    565  // The underlying instruction is XCHG, which can operate on any
    566  // register.
    567  //
    568  // If the target is a floating register (for Uint32) then we need
    569  // a temp into which to exchange.
    570  //
    571  // If the source is a byte array then we need a register that has
    572  // a byte size; in this case -- on x86 only -- pin the output to
    573  // an appropriate register and use that as a temp in the back-end.
    574 
    575  LDefinition tempDef = LDefinition::BogusTemp();
    576  if (ins->arrayType() == Scalar::Uint32) {
    577    MOZ_ASSERT(ins->type() == MIRType::Double);
    578    tempDef = temp();
    579  }
    580 
    581  LAtomicExchangeTypedArrayElement* lir = new (alloc())
    582      LAtomicExchangeTypedArrayElement(elements, index, value, tempDef);
    583 
    584  if (useI386ByteRegisters && ins->isByteArray()) {
    585    defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
    586  } else {
    587    define(lir, ins);
    588  }
    589 }
    590 
    591 void LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(
    592    MAtomicTypedArrayElementBinop* ins, bool useI386ByteRegisters) {
    593  MOZ_ASSERT(ins->arrayType() != Scalar::Uint8Clamped);
    594  MOZ_ASSERT(!Scalar::isFloatingType(ins->arrayType()));
    595  MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
    596  MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr);
    597 
    598  const LUse elements = useRegister(ins->elements());
    599  const LAllocation index =
    600      useRegisterOrIndexConstant(ins->index(), ins->arrayType());
    601 
    602  // Case 1: the result of the operation is not used.
    603  //
    604  // We'll emit a single instruction: LOCK ADD, LOCK SUB, LOCK AND,
    605  // LOCK OR, or LOCK XOR.  We can do this even for the Uint32 case.
    606 
    607  if (ins->isForEffect()) {
    608    LAllocation value;
    609    if (useI386ByteRegisters && ins->isByteArray() &&
    610        !ins->value()->isConstant()) {
    611      value = useFixed(ins->value(), ebx);
    612    } else {
    613      value = useRegisterOrConstant(ins->value());
    614    }
    615 
    616    LAtomicTypedArrayElementBinopForEffect* lir = new (alloc())
    617        LAtomicTypedArrayElementBinopForEffect(elements, index, value);
    618 
    619    add(lir, ins);
    620    return;
    621  }
    622 
    623  // Case 2: the result of the operation is used.
    624  //
    625  // For ADD and SUB we'll use XADD:
    626  //
    627  //    movl       src, output
    628  //    lock xaddl output, mem
    629  //
    630  // For the 8-bit variants XADD needs a byte register for the output.
    631  //
    632  // For AND/OR/XOR we need to use a CMPXCHG loop:
    633  //
    634  //    movl          *mem, eax
    635  // L: mov           eax, temp
    636  //    andl          src, temp
    637  //    lock cmpxchg  temp, mem  ; reads eax also
    638  //    jnz           L
    639  //    ; result in eax
    640  //
    641  // Note the placement of L, cmpxchg will update eax with *mem if
    642  // *mem does not have the expected value, so reloading it at the
    643  // top of the loop would be redundant.
    644  //
    645  // If the array is not a uint32 array then:
    646  //  - eax should be the output (one result of the cmpxchg)
    647  //  - there is a temp, which must have a byte register if
    648  //    the array has 1-byte elements elements
    649  //
    650  // If the array is a uint32 array then:
    651  //  - eax is the first temp
    652  //  - we also need a second temp
    653  //
    654  // There are optimization opportunities:
    655  //  - better register allocation in the x86 8-bit case, Bug #1077036.
    656 
    657  bool bitOp =
    658      !(ins->operation() == AtomicOp::Add || ins->operation() == AtomicOp::Sub);
    659  bool fixedOutput = true;
    660  bool reuseInput = false;
    661  LDefinition tempDef1 = LDefinition::BogusTemp();
    662  LDefinition tempDef2 = LDefinition::BogusTemp();
    663  LAllocation value;
    664 
    665  if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) {
    666    value = useRegisterOrConstant(ins->value());
    667    fixedOutput = false;
    668    if (bitOp) {
    669      tempDef1 = tempFixed(eax);
    670      tempDef2 = temp();
    671    } else {
    672      tempDef1 = temp();
    673    }
    674  } else if (useI386ByteRegisters && ins->isByteArray()) {
    675    if (ins->value()->isConstant()) {
    676      value = useRegisterOrConstant(ins->value());
    677    } else {
    678      value = useFixed(ins->value(), ebx);
    679    }
    680    if (bitOp) {
    681      tempDef1 = tempFixed(ecx);
    682    }
    683  } else if (bitOp) {
    684    value = useRegisterOrConstant(ins->value());
    685    tempDef1 = temp();
    686  } else if (ins->value()->isConstant()) {
    687    fixedOutput = false;
    688    value = useRegisterOrConstant(ins->value());
    689  } else {
    690    fixedOutput = false;
    691    reuseInput = true;
    692    value = useRegisterAtStart(ins->value());
    693  }
    694 
    695  LAtomicTypedArrayElementBinop* lir = new (alloc())
    696      LAtomicTypedArrayElementBinop(elements, index, value, tempDef1, tempDef2);
    697 
    698  if (fixedOutput) {
    699    defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
    700  } else if (reuseInput) {
    701    defineReuseInput(lir, ins, LAtomicTypedArrayElementBinop::ValueIndex);
    702  } else {
    703    define(lir, ins);
    704  }
    705 }
    706 
    707 void LIRGenerator::visitCopySign(MCopySign* ins) {
    708  MDefinition* lhs = ins->lhs();
    709  MDefinition* rhs = ins->rhs();
    710 
    711  MOZ_ASSERT(IsFloatingPointType(lhs->type()));
    712  MOZ_ASSERT(lhs->type() == rhs->type());
    713  MOZ_ASSERT(lhs->type() == ins->type());
    714 
    715  LInstructionHelper<1, 2, 0>* lir;
    716  if (lhs->type() == MIRType::Double) {
    717    lir = new (alloc()) LCopySignD();
    718  } else {
    719    lir = new (alloc()) LCopySignF();
    720  }
    721 
    722  // As lowerForFPU, but we want rhs to be in a FP register too.
    723  lir->setOperand(0, useRegisterAtStart(lhs));
    724  if (!Assembler::HasAVX()) {
    725    lir->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs)
    726                           ? useRegister(rhs)
    727                           : useRegisterAtStart(rhs));
    728    defineReuseInput(lir, ins, 0);
    729  } else {
    730    lir->setOperand(1, useRegisterAtStart(rhs));
    731    define(lir, ins);
    732  }
    733 }
    734 
    735 // These lowerings are really x86-shared but some Masm APIs are not yet
    736 // available on x86.
    737 
    738 // Ternary and binary operators require the dest register to be the same as
    739 // their first input register, leading to a pattern of useRegisterAtStart +
    740 // defineReuseInput.
    741 
    742 void LIRGenerator::visitWasmTernarySimd128(MWasmTernarySimd128* ins) {
    743 #ifdef ENABLE_WASM_SIMD
    744  MOZ_ASSERT(ins->v0()->type() == MIRType::Simd128);
    745  MOZ_ASSERT(ins->v1()->type() == MIRType::Simd128);
    746  MOZ_ASSERT(ins->v2()->type() == MIRType::Simd128);
    747  MOZ_ASSERT(ins->type() == MIRType::Simd128);
    748 
    749  switch (ins->simdOp()) {
    750    case wasm::SimdOp::V128Bitselect: {
    751      // Enforcing lhs == output avoids one setup move.  We would like to also
    752      // enforce merging the control with the temp (with
    753      // usRegisterAtStart(control) and tempCopy()), but the register allocator
    754      // ignores those constraints at present.
    755      auto* lir = new (alloc()) LWasmTernarySimd128(
    756          useRegisterAtStart(ins->v0()), useRegister(ins->v1()),
    757          useRegister(ins->v2()), tempSimd128(), ins->simdOp());
    758      defineReuseInput(lir, ins, LWasmTernarySimd128::V0Index);
    759      break;
    760    }
    761    case wasm::SimdOp::F32x4RelaxedMadd:
    762    case wasm::SimdOp::F32x4RelaxedNmadd:
    763    case wasm::SimdOp::F64x2RelaxedMadd:
    764    case wasm::SimdOp::F64x2RelaxedNmadd: {
    765      auto* lir = new (alloc())
    766          LWasmTernarySimd128(useRegister(ins->v0()), useRegister(ins->v1()),
    767                              useRegisterAtStart(ins->v2()),
    768                              LDefinition::BogusTemp(), ins->simdOp());
    769      defineReuseInput(lir, ins, LWasmTernarySimd128::V2Index);
    770      break;
    771    }
    772    case wasm::SimdOp::I32x4RelaxedDotI8x16I7x16AddS: {
    773      auto* lir = new (alloc())
    774          LWasmTernarySimd128(useRegister(ins->v0()), useRegister(ins->v1()),
    775                              useRegisterAtStart(ins->v2()),
    776                              LDefinition::BogusTemp(), ins->simdOp());
    777      defineReuseInput(lir, ins, LWasmTernarySimd128::V2Index);
    778      break;
    779    }
    780    case wasm::SimdOp::I8x16RelaxedLaneSelect:
    781    case wasm::SimdOp::I16x8RelaxedLaneSelect:
    782    case wasm::SimdOp::I32x4RelaxedLaneSelect:
    783    case wasm::SimdOp::I64x2RelaxedLaneSelect: {
    784      if (Assembler::HasAVX()) {
    785        auto* lir = new (alloc()) LWasmTernarySimd128(
    786            useRegisterAtStart(ins->v0()), useRegisterAtStart(ins->v1()),
    787            useRegisterAtStart(ins->v2()), LDefinition::BogusTemp(),
    788            ins->simdOp());
    789        define(lir, ins);
    790      } else {
    791        auto* lir = new (alloc()) LWasmTernarySimd128(
    792            useRegister(ins->v0()), useRegisterAtStart(ins->v1()),
    793            useFixed(ins->v2(), vmm0), LDefinition::BogusTemp(), ins->simdOp());
    794        defineReuseInput(lir, ins, LWasmTernarySimd128::V1Index);
    795      }
    796      break;
    797    }
    798    default:
    799      MOZ_CRASH("NYI");
    800  }
    801 #else
    802  MOZ_CRASH("No SIMD");
    803 #endif
    804 }
    805 
    806 void LIRGenerator::visitWasmBinarySimd128(MWasmBinarySimd128* ins) {
    807 #ifdef ENABLE_WASM_SIMD
    808  MDefinition* lhs = ins->lhs();
    809  MDefinition* rhs = ins->rhs();
    810  wasm::SimdOp op = ins->simdOp();
    811 
    812  MOZ_ASSERT(lhs->type() == MIRType::Simd128);
    813  MOZ_ASSERT(rhs->type() == MIRType::Simd128);
    814  MOZ_ASSERT(ins->type() == MIRType::Simd128);
    815 
    816  // Note MWasmBinarySimd128::foldsTo has already specialized operations that
    817  // have a constant operand, so this takes care of more general cases of
    818  // reordering, see ReorderCommutative.
    819  if (ins->isCommutative()) {
    820    ReorderCommutative(&lhs, &rhs, ins);
    821  }
    822 
    823  // Swap operands and change operation if necessary, these are all x86/x64
    824  // dependent transformations.  Except where noted, this is about avoiding
    825  // unnecessary moves and fixups in the code generator macros.
    826  bool swap = false;
    827  switch (op) {
    828    case wasm::SimdOp::V128AndNot: {
    829      // Code generation requires the operands to be reversed.
    830      swap = true;
    831      break;
    832    }
    833    case wasm::SimdOp::I8x16LtS: {
    834      swap = true;
    835      op = wasm::SimdOp::I8x16GtS;
    836      break;
    837    }
    838    case wasm::SimdOp::I8x16GeS: {
    839      swap = true;
    840      op = wasm::SimdOp::I8x16LeS;
    841      break;
    842    }
    843    case wasm::SimdOp::I16x8LtS: {
    844      swap = true;
    845      op = wasm::SimdOp::I16x8GtS;
    846      break;
    847    }
    848    case wasm::SimdOp::I16x8GeS: {
    849      swap = true;
    850      op = wasm::SimdOp::I16x8LeS;
    851      break;
    852    }
    853    case wasm::SimdOp::I32x4LtS: {
    854      swap = true;
    855      op = wasm::SimdOp::I32x4GtS;
    856      break;
    857    }
    858    case wasm::SimdOp::I32x4GeS: {
    859      swap = true;
    860      op = wasm::SimdOp::I32x4LeS;
    861      break;
    862    }
    863    case wasm::SimdOp::F32x4Gt: {
    864      swap = true;
    865      op = wasm::SimdOp::F32x4Lt;
    866      break;
    867    }
    868    case wasm::SimdOp::F32x4Ge: {
    869      swap = true;
    870      op = wasm::SimdOp::F32x4Le;
    871      break;
    872    }
    873    case wasm::SimdOp::F64x2Gt: {
    874      swap = true;
    875      op = wasm::SimdOp::F64x2Lt;
    876      break;
    877    }
    878    case wasm::SimdOp::F64x2Ge: {
    879      swap = true;
    880      op = wasm::SimdOp::F64x2Le;
    881      break;
    882    }
    883    case wasm::SimdOp::F32x4PMin:
    884    case wasm::SimdOp::F32x4PMax:
    885    case wasm::SimdOp::F64x2PMin:
    886    case wasm::SimdOp::F64x2PMax: {
    887      // Code generation requires the operations to be reversed (the rhs is the
    888      // output register).
    889      swap = true;
    890      break;
    891    }
    892    default:
    893      break;
    894  }
    895  if (swap) {
    896    MDefinition* tmp = lhs;
    897    lhs = rhs;
    898    rhs = tmp;
    899  }
    900 
    901  // Allocate temp registers
    902  LDefinition tempReg0 = LDefinition::BogusTemp();
    903  LDefinition tempReg1 = LDefinition::BogusTemp();
    904  switch (op) {
    905    case wasm::SimdOp::I64x2Mul:
    906      tempReg0 = tempSimd128();
    907      break;
    908    case wasm::SimdOp::F32x4Min:
    909    case wasm::SimdOp::F32x4Max:
    910    case wasm::SimdOp::F64x2Min:
    911    case wasm::SimdOp::F64x2Max:
    912      tempReg0 = tempSimd128();
    913      tempReg1 = tempSimd128();
    914      break;
    915    case wasm::SimdOp::I64x2LtS:
    916    case wasm::SimdOp::I64x2GtS:
    917    case wasm::SimdOp::I64x2LeS:
    918    case wasm::SimdOp::I64x2GeS:
    919      // The compareForOrderingInt64x2AVX implementation does not require
    920      // temps but needs SSE4.2 support. Checking if both AVX and SSE4.2
    921      // are enabled.
    922      if (!(Assembler::HasAVX() && Assembler::HasSSE42())) {
    923        tempReg0 = tempSimd128();
    924        tempReg1 = tempSimd128();
    925      }
    926      break;
    927    default:
    928      break;
    929  }
    930 
    931  // For binary ops, without AVX support, the Masm API always is usually
    932  // (rhs, lhsDest) and requires  AtStart+ReuseInput for the lhs.
    933  //
    934  // For a few ops, the API is actually (rhsDest, lhs) and the rules are the
    935  // same but the reversed.  We swapped operands above; they will be swapped
    936  // again in the code generator to emit the right code.
    937  //
    938  // If AVX support is enabled, some binary ops can use output as destination,
    939  // useRegisterAtStart is applied for both operands and no need for ReuseInput.
    940 
    941  switch (op) {
    942    case wasm::SimdOp::I8x16AvgrU:
    943    case wasm::SimdOp::I16x8AvgrU:
    944    case wasm::SimdOp::I8x16Add:
    945    case wasm::SimdOp::I8x16AddSatS:
    946    case wasm::SimdOp::I8x16AddSatU:
    947    case wasm::SimdOp::I8x16Sub:
    948    case wasm::SimdOp::I8x16SubSatS:
    949    case wasm::SimdOp::I8x16SubSatU:
    950    case wasm::SimdOp::I16x8Mul:
    951    case wasm::SimdOp::I16x8MinS:
    952    case wasm::SimdOp::I16x8MinU:
    953    case wasm::SimdOp::I16x8MaxS:
    954    case wasm::SimdOp::I16x8MaxU:
    955    case wasm::SimdOp::I32x4Add:
    956    case wasm::SimdOp::I32x4Sub:
    957    case wasm::SimdOp::I32x4Mul:
    958    case wasm::SimdOp::I32x4MinS:
    959    case wasm::SimdOp::I32x4MinU:
    960    case wasm::SimdOp::I32x4MaxS:
    961    case wasm::SimdOp::I32x4MaxU:
    962    case wasm::SimdOp::I64x2Add:
    963    case wasm::SimdOp::I64x2Sub:
    964    case wasm::SimdOp::I64x2Mul:
    965    case wasm::SimdOp::F32x4Add:
    966    case wasm::SimdOp::F32x4Sub:
    967    case wasm::SimdOp::F32x4Mul:
    968    case wasm::SimdOp::F32x4Div:
    969    case wasm::SimdOp::F64x2Add:
    970    case wasm::SimdOp::F64x2Sub:
    971    case wasm::SimdOp::F64x2Mul:
    972    case wasm::SimdOp::F64x2Div:
    973    case wasm::SimdOp::F32x4Eq:
    974    case wasm::SimdOp::F32x4Ne:
    975    case wasm::SimdOp::F32x4Lt:
    976    case wasm::SimdOp::F32x4Le:
    977    case wasm::SimdOp::F64x2Eq:
    978    case wasm::SimdOp::F64x2Ne:
    979    case wasm::SimdOp::F64x2Lt:
    980    case wasm::SimdOp::F64x2Le:
    981    case wasm::SimdOp::F32x4PMin:
    982    case wasm::SimdOp::F32x4PMax:
    983    case wasm::SimdOp::F64x2PMin:
    984    case wasm::SimdOp::F64x2PMax:
    985    case wasm::SimdOp::I8x16Swizzle:
    986    case wasm::SimdOp::I8x16RelaxedSwizzle:
    987    case wasm::SimdOp::I8x16Eq:
    988    case wasm::SimdOp::I8x16Ne:
    989    case wasm::SimdOp::I8x16GtS:
    990    case wasm::SimdOp::I8x16LeS:
    991    case wasm::SimdOp::I8x16LtU:
    992    case wasm::SimdOp::I8x16GtU:
    993    case wasm::SimdOp::I8x16LeU:
    994    case wasm::SimdOp::I8x16GeU:
    995    case wasm::SimdOp::I16x8Eq:
    996    case wasm::SimdOp::I16x8Ne:
    997    case wasm::SimdOp::I16x8GtS:
    998    case wasm::SimdOp::I16x8LeS:
    999    case wasm::SimdOp::I16x8LtU:
   1000    case wasm::SimdOp::I16x8GtU:
   1001    case wasm::SimdOp::I16x8LeU:
   1002    case wasm::SimdOp::I16x8GeU:
   1003    case wasm::SimdOp::I32x4Eq:
   1004    case wasm::SimdOp::I32x4Ne:
   1005    case wasm::SimdOp::I32x4GtS:
   1006    case wasm::SimdOp::I32x4LeS:
   1007    case wasm::SimdOp::I32x4LtU:
   1008    case wasm::SimdOp::I32x4GtU:
   1009    case wasm::SimdOp::I32x4LeU:
   1010    case wasm::SimdOp::I32x4GeU:
   1011    case wasm::SimdOp::I64x2Eq:
   1012    case wasm::SimdOp::I64x2Ne:
   1013    case wasm::SimdOp::I64x2LtS:
   1014    case wasm::SimdOp::I64x2GtS:
   1015    case wasm::SimdOp::I64x2LeS:
   1016    case wasm::SimdOp::I64x2GeS:
   1017    case wasm::SimdOp::V128And:
   1018    case wasm::SimdOp::V128Or:
   1019    case wasm::SimdOp::V128Xor:
   1020    case wasm::SimdOp::V128AndNot:
   1021    case wasm::SimdOp::F32x4Min:
   1022    case wasm::SimdOp::F32x4Max:
   1023    case wasm::SimdOp::F64x2Min:
   1024    case wasm::SimdOp::F64x2Max:
   1025    case wasm::SimdOp::I8x16NarrowI16x8S:
   1026    case wasm::SimdOp::I8x16NarrowI16x8U:
   1027    case wasm::SimdOp::I16x8NarrowI32x4S:
   1028    case wasm::SimdOp::I16x8NarrowI32x4U:
   1029    case wasm::SimdOp::I32x4DotI16x8S:
   1030    case wasm::SimdOp::I16x8ExtmulLowI8x16S:
   1031    case wasm::SimdOp::I16x8ExtmulHighI8x16S:
   1032    case wasm::SimdOp::I16x8ExtmulLowI8x16U:
   1033    case wasm::SimdOp::I16x8ExtmulHighI8x16U:
   1034    case wasm::SimdOp::I32x4ExtmulLowI16x8S:
   1035    case wasm::SimdOp::I32x4ExtmulHighI16x8S:
   1036    case wasm::SimdOp::I32x4ExtmulLowI16x8U:
   1037    case wasm::SimdOp::I32x4ExtmulHighI16x8U:
   1038    case wasm::SimdOp::I64x2ExtmulLowI32x4S:
   1039    case wasm::SimdOp::I64x2ExtmulHighI32x4S:
   1040    case wasm::SimdOp::I64x2ExtmulLowI32x4U:
   1041    case wasm::SimdOp::I64x2ExtmulHighI32x4U:
   1042    case wasm::SimdOp::I16x8Q15MulrSatS:
   1043    case wasm::SimdOp::F32x4RelaxedMin:
   1044    case wasm::SimdOp::F32x4RelaxedMax:
   1045    case wasm::SimdOp::F64x2RelaxedMin:
   1046    case wasm::SimdOp::F64x2RelaxedMax:
   1047    case wasm::SimdOp::I16x8RelaxedQ15MulrS:
   1048    case wasm::SimdOp::I16x8RelaxedDotI8x16I7x16S:
   1049    case wasm::SimdOp::MozPMADDUBSW:
   1050      if (isThreeOpAllowed()) {
   1051        auto* lir = new (alloc())
   1052            LWasmBinarySimd128(useRegisterAtStart(lhs), useRegisterAtStart(rhs),
   1053                               tempReg0, tempReg1, op);
   1054        define(lir, ins);
   1055        break;
   1056      }
   1057      [[fallthrough]];
   1058    default: {
   1059      LAllocation lhsDestAlloc = useRegisterAtStart(lhs);
   1060      LAllocation rhsAlloc = willHaveDifferentLIRNodes(lhs, rhs)
   1061                                 ? useRegister(rhs)
   1062                                 : useRegisterAtStart(rhs);
   1063      auto* lir = new (alloc())
   1064          LWasmBinarySimd128(lhsDestAlloc, rhsAlloc, tempReg0, tempReg1, op);
   1065      defineReuseInput(lir, ins, LWasmBinarySimd128::LhsIndex);
   1066      break;
   1067    }
   1068  }
   1069 #else
   1070  MOZ_CRASH("No SIMD");
   1071 #endif
   1072 }
   1073 
   1074 #ifdef ENABLE_WASM_SIMD
   1075 bool MWasmTernarySimd128::specializeBitselectConstantMaskAsShuffle(
   1076    int8_t shuffle[16]) {
   1077  if (simdOp() != wasm::SimdOp::V128Bitselect) {
   1078    return false;
   1079  }
   1080 
   1081  // Optimization when control vector is a mask with all 0 or all 1 per lane.
   1082  // On x86, there is no bitselect, blend operations will be a win,
   1083  // e.g. via PBLENDVB or PBLENDW.
   1084  SimdConstant constant = static_cast<MWasmFloatConstant*>(v2())->toSimd128();
   1085  const SimdConstant::I8x16& bytes = constant.asInt8x16();
   1086  for (int8_t i = 0; i < 16; i++) {
   1087    if (bytes[i] == -1) {
   1088      shuffle[i] = i;
   1089    } else if (bytes[i] == 0) {
   1090      shuffle[i] = i + 16;
   1091    } else {
   1092      return false;
   1093    }
   1094  }
   1095  return true;
   1096 }
   1097 bool MWasmTernarySimd128::canRelaxBitselect() {
   1098  wasm::SimdOp simdOp;
   1099  if (v2()->isWasmBinarySimd128()) {
   1100    simdOp = v2()->toWasmBinarySimd128()->simdOp();
   1101  } else if (v2()->isWasmBinarySimd128WithConstant()) {
   1102    simdOp = v2()->toWasmBinarySimd128WithConstant()->simdOp();
   1103  } else {
   1104    return false;
   1105  }
   1106  switch (simdOp) {
   1107    case wasm::SimdOp::I8x16Eq:
   1108    case wasm::SimdOp::I8x16Ne:
   1109    case wasm::SimdOp::I8x16GtS:
   1110    case wasm::SimdOp::I8x16GeS:
   1111    case wasm::SimdOp::I8x16LtS:
   1112    case wasm::SimdOp::I8x16LeS:
   1113    case wasm::SimdOp::I8x16GtU:
   1114    case wasm::SimdOp::I8x16GeU:
   1115    case wasm::SimdOp::I8x16LtU:
   1116    case wasm::SimdOp::I8x16LeU:
   1117    case wasm::SimdOp::I16x8Eq:
   1118    case wasm::SimdOp::I16x8Ne:
   1119    case wasm::SimdOp::I16x8GtS:
   1120    case wasm::SimdOp::I16x8GeS:
   1121    case wasm::SimdOp::I16x8LtS:
   1122    case wasm::SimdOp::I16x8LeS:
   1123    case wasm::SimdOp::I16x8GtU:
   1124    case wasm::SimdOp::I16x8GeU:
   1125    case wasm::SimdOp::I16x8LtU:
   1126    case wasm::SimdOp::I16x8LeU:
   1127    case wasm::SimdOp::I32x4Eq:
   1128    case wasm::SimdOp::I32x4Ne:
   1129    case wasm::SimdOp::I32x4GtS:
   1130    case wasm::SimdOp::I32x4GeS:
   1131    case wasm::SimdOp::I32x4LtS:
   1132    case wasm::SimdOp::I32x4LeS:
   1133    case wasm::SimdOp::I32x4GtU:
   1134    case wasm::SimdOp::I32x4GeU:
   1135    case wasm::SimdOp::I32x4LtU:
   1136    case wasm::SimdOp::I32x4LeU:
   1137    case wasm::SimdOp::I64x2Eq:
   1138    case wasm::SimdOp::I64x2Ne:
   1139    case wasm::SimdOp::I64x2GtS:
   1140    case wasm::SimdOp::I64x2GeS:
   1141    case wasm::SimdOp::I64x2LtS:
   1142    case wasm::SimdOp::I64x2LeS:
   1143    case wasm::SimdOp::F32x4Eq:
   1144    case wasm::SimdOp::F32x4Ne:
   1145    case wasm::SimdOp::F32x4Gt:
   1146    case wasm::SimdOp::F32x4Ge:
   1147    case wasm::SimdOp::F32x4Lt:
   1148    case wasm::SimdOp::F32x4Le:
   1149    case wasm::SimdOp::F64x2Eq:
   1150    case wasm::SimdOp::F64x2Ne:
   1151    case wasm::SimdOp::F64x2Gt:
   1152    case wasm::SimdOp::F64x2Ge:
   1153    case wasm::SimdOp::F64x2Lt:
   1154    case wasm::SimdOp::F64x2Le:
   1155      return true;
   1156    default:
   1157      break;
   1158  }
   1159  return false;
   1160 }
   1161 
   1162 bool MWasmBinarySimd128::canPmaddubsw() {
   1163  MOZ_ASSERT(Assembler::HasSSE3());
   1164  return true;
   1165 }
   1166 #endif
   1167 
   1168 bool MWasmBinarySimd128::specializeForConstantRhs() {
   1169  // The order follows MacroAssembler.h, generally
   1170  switch (simdOp()) {
   1171    // Operations implemented by a single native instruction where it is
   1172    // plausible that the rhs (after commutation if available) could be a
   1173    // constant.
   1174    //
   1175    // Swizzle is not here because it was handled earlier in the pipeline.
   1176    //
   1177    // Integer compares >= and < are not here because they are not supported in
   1178    // the hardware.
   1179    //
   1180    // Floating compares are not here because our patching machinery can't
   1181    // handle them yet.
   1182    //
   1183    // Floating-point min and max (including pmin and pmax) are not here because
   1184    // they are not straightforward to implement.
   1185    case wasm::SimdOp::I8x16Add:
   1186    case wasm::SimdOp::I16x8Add:
   1187    case wasm::SimdOp::I32x4Add:
   1188    case wasm::SimdOp::I64x2Add:
   1189    case wasm::SimdOp::I8x16Sub:
   1190    case wasm::SimdOp::I16x8Sub:
   1191    case wasm::SimdOp::I32x4Sub:
   1192    case wasm::SimdOp::I64x2Sub:
   1193    case wasm::SimdOp::I16x8Mul:
   1194    case wasm::SimdOp::I32x4Mul:
   1195    case wasm::SimdOp::I8x16AddSatS:
   1196    case wasm::SimdOp::I8x16AddSatU:
   1197    case wasm::SimdOp::I16x8AddSatS:
   1198    case wasm::SimdOp::I16x8AddSatU:
   1199    case wasm::SimdOp::I8x16SubSatS:
   1200    case wasm::SimdOp::I8x16SubSatU:
   1201    case wasm::SimdOp::I16x8SubSatS:
   1202    case wasm::SimdOp::I16x8SubSatU:
   1203    case wasm::SimdOp::I8x16MinS:
   1204    case wasm::SimdOp::I8x16MinU:
   1205    case wasm::SimdOp::I16x8MinS:
   1206    case wasm::SimdOp::I16x8MinU:
   1207    case wasm::SimdOp::I32x4MinS:
   1208    case wasm::SimdOp::I32x4MinU:
   1209    case wasm::SimdOp::I8x16MaxS:
   1210    case wasm::SimdOp::I8x16MaxU:
   1211    case wasm::SimdOp::I16x8MaxS:
   1212    case wasm::SimdOp::I16x8MaxU:
   1213    case wasm::SimdOp::I32x4MaxS:
   1214    case wasm::SimdOp::I32x4MaxU:
   1215    case wasm::SimdOp::V128And:
   1216    case wasm::SimdOp::V128Or:
   1217    case wasm::SimdOp::V128Xor:
   1218    case wasm::SimdOp::I8x16Eq:
   1219    case wasm::SimdOp::I8x16Ne:
   1220    case wasm::SimdOp::I8x16GtS:
   1221    case wasm::SimdOp::I8x16LeS:
   1222    case wasm::SimdOp::I16x8Eq:
   1223    case wasm::SimdOp::I16x8Ne:
   1224    case wasm::SimdOp::I16x8GtS:
   1225    case wasm::SimdOp::I16x8LeS:
   1226    case wasm::SimdOp::I32x4Eq:
   1227    case wasm::SimdOp::I32x4Ne:
   1228    case wasm::SimdOp::I32x4GtS:
   1229    case wasm::SimdOp::I32x4LeS:
   1230    case wasm::SimdOp::I64x2Mul:
   1231    case wasm::SimdOp::F32x4Eq:
   1232    case wasm::SimdOp::F32x4Ne:
   1233    case wasm::SimdOp::F32x4Lt:
   1234    case wasm::SimdOp::F32x4Le:
   1235    case wasm::SimdOp::F64x2Eq:
   1236    case wasm::SimdOp::F64x2Ne:
   1237    case wasm::SimdOp::F64x2Lt:
   1238    case wasm::SimdOp::F64x2Le:
   1239    case wasm::SimdOp::I32x4DotI16x8S:
   1240    case wasm::SimdOp::F32x4Add:
   1241    case wasm::SimdOp::F64x2Add:
   1242    case wasm::SimdOp::F32x4Sub:
   1243    case wasm::SimdOp::F64x2Sub:
   1244    case wasm::SimdOp::F32x4Div:
   1245    case wasm::SimdOp::F64x2Div:
   1246    case wasm::SimdOp::F32x4Mul:
   1247    case wasm::SimdOp::F64x2Mul:
   1248    case wasm::SimdOp::I8x16NarrowI16x8S:
   1249    case wasm::SimdOp::I8x16NarrowI16x8U:
   1250    case wasm::SimdOp::I16x8NarrowI32x4S:
   1251    case wasm::SimdOp::I16x8NarrowI32x4U:
   1252      return true;
   1253    default:
   1254      return false;
   1255  }
   1256 }
   1257 
   1258 void LIRGenerator::visitWasmBinarySimd128WithConstant(
   1259    MWasmBinarySimd128WithConstant* ins) {
   1260 #ifdef ENABLE_WASM_SIMD
   1261  MDefinition* lhs = ins->lhs();
   1262 
   1263  MOZ_ASSERT(lhs->type() == MIRType::Simd128);
   1264  MOZ_ASSERT(ins->type() == MIRType::Simd128);
   1265 
   1266  // Allocate temp registers
   1267  LDefinition tempReg = LDefinition::BogusTemp();
   1268  switch (ins->simdOp()) {
   1269    case wasm::SimdOp::I64x2Mul:
   1270      tempReg = tempSimd128();
   1271      break;
   1272    default:
   1273      break;
   1274  }
   1275 
   1276  if (isThreeOpAllowed()) {
   1277    // The non-destructive versions of instructions will be available
   1278    // when AVX is enabled.
   1279    LAllocation lhsAlloc = useRegisterAtStart(lhs);
   1280    auto* lir = new (alloc())
   1281        LWasmBinarySimd128WithConstant(lhsAlloc, tempReg, ins->rhs());
   1282    define(lir, ins);
   1283  } else {
   1284    // Always beneficial to reuse the lhs register here, see discussion in
   1285    // visitWasmBinarySimd128() and also code in specializeForConstantRhs().
   1286    LAllocation lhsDestAlloc = useRegisterAtStart(lhs);
   1287    auto* lir = new (alloc())
   1288        LWasmBinarySimd128WithConstant(lhsDestAlloc, tempReg, ins->rhs());
   1289    defineReuseInput(lir, ins, LWasmBinarySimd128WithConstant::LhsIndex);
   1290  }
   1291 #else
   1292  MOZ_CRASH("No SIMD");
   1293 #endif
   1294 }
   1295 
   1296 void LIRGenerator::visitWasmShiftSimd128(MWasmShiftSimd128* ins) {
   1297 #ifdef ENABLE_WASM_SIMD
   1298  MDefinition* lhs = ins->lhs();
   1299  MDefinition* rhs = ins->rhs();
   1300 
   1301  MOZ_ASSERT(lhs->type() == MIRType::Simd128);
   1302  MOZ_ASSERT(rhs->type() == MIRType::Int32);
   1303  MOZ_ASSERT(ins->type() == MIRType::Simd128);
   1304 
   1305  if (rhs->isConstant()) {
   1306    int32_t shiftCountMask;
   1307    switch (ins->simdOp()) {
   1308      case wasm::SimdOp::I8x16Shl:
   1309      case wasm::SimdOp::I8x16ShrU:
   1310      case wasm::SimdOp::I8x16ShrS:
   1311        shiftCountMask = 7;
   1312        break;
   1313      case wasm::SimdOp::I16x8Shl:
   1314      case wasm::SimdOp::I16x8ShrU:
   1315      case wasm::SimdOp::I16x8ShrS:
   1316        shiftCountMask = 15;
   1317        break;
   1318      case wasm::SimdOp::I32x4Shl:
   1319      case wasm::SimdOp::I32x4ShrU:
   1320      case wasm::SimdOp::I32x4ShrS:
   1321        shiftCountMask = 31;
   1322        break;
   1323      case wasm::SimdOp::I64x2Shl:
   1324      case wasm::SimdOp::I64x2ShrU:
   1325      case wasm::SimdOp::I64x2ShrS:
   1326        shiftCountMask = 63;
   1327        break;
   1328      default:
   1329        MOZ_CRASH("Unexpected shift operation");
   1330    }
   1331 
   1332    int32_t shiftCount = rhs->toConstant()->toInt32() & shiftCountMask;
   1333    if (shiftCount == shiftCountMask) {
   1334      // Check if possible to apply sign replication optimization.
   1335      // For some ops the input shall be reused.
   1336      switch (ins->simdOp()) {
   1337        case wasm::SimdOp::I8x16ShrS: {
   1338          auto* lir =
   1339              new (alloc()) LWasmSignReplicationSimd128(useRegister(lhs));
   1340          define(lir, ins);
   1341          return;
   1342        }
   1343        case wasm::SimdOp::I16x8ShrS:
   1344        case wasm::SimdOp::I32x4ShrS:
   1345        case wasm::SimdOp::I64x2ShrS: {
   1346          auto* lir = new (alloc())
   1347              LWasmSignReplicationSimd128(useRegisterAtStart(lhs));
   1348          if (isThreeOpAllowed()) {
   1349            define(lir, ins);
   1350          } else {
   1351            // For non-AVX, it is always beneficial to reuse the input.
   1352            defineReuseInput(lir, ins, LWasmSignReplicationSimd128::SrcIndex);
   1353          }
   1354          return;
   1355        }
   1356        default:
   1357          break;
   1358      }
   1359    }
   1360 
   1361 #  ifdef DEBUG
   1362    js::wasm::ReportSimdAnalysis("shift -> constant shift");
   1363 #  endif
   1364    auto* lir = new (alloc())
   1365        LWasmConstantShiftSimd128(useRegisterAtStart(lhs), shiftCount);
   1366    if (isThreeOpAllowed()) {
   1367      define(lir, ins);
   1368    } else {
   1369      // For non-AVX, it is always beneficial to reuse the input.
   1370      defineReuseInput(lir, ins, LWasmConstantShiftSimd128::SrcIndex);
   1371    }
   1372    return;
   1373  }
   1374 
   1375 #  ifdef DEBUG
   1376  js::wasm::ReportSimdAnalysis("shift -> variable shift");
   1377 #  endif
   1378 
   1379  LDefinition tempReg = LDefinition::BogusTemp();
   1380  switch (ins->simdOp()) {
   1381    case wasm::SimdOp::I8x16Shl:
   1382    case wasm::SimdOp::I8x16ShrS:
   1383    case wasm::SimdOp::I8x16ShrU:
   1384    case wasm::SimdOp::I64x2ShrS:
   1385      tempReg = tempSimd128();
   1386      break;
   1387    default:
   1388      break;
   1389  }
   1390 
   1391  // Reusing the input if possible is never detrimental.
   1392  LAllocation lhsDestAlloc = useRegisterAtStart(lhs);
   1393  LAllocation rhsAlloc = useRegisterAtStart(rhs);
   1394  auto* lir =
   1395      new (alloc()) LWasmVariableShiftSimd128(lhsDestAlloc, rhsAlloc, tempReg);
   1396  defineReuseInput(lir, ins, LWasmVariableShiftSimd128::LhsIndex);
   1397 #else
   1398  MOZ_CRASH("No SIMD");
   1399 #endif
   1400 }
   1401 
   1402 void LIRGenerator::visitWasmShuffleSimd128(MWasmShuffleSimd128* ins) {
   1403 #ifdef ENABLE_WASM_SIMD
   1404  MOZ_ASSERT(ins->lhs()->type() == MIRType::Simd128);
   1405  MOZ_ASSERT(ins->rhs()->type() == MIRType::Simd128);
   1406  MOZ_ASSERT(ins->type() == MIRType::Simd128);
   1407 
   1408  SimdShuffle s = ins->shuffle();
   1409  switch (s.opd) {
   1410    case SimdShuffle::Operand::LEFT:
   1411    case SimdShuffle::Operand::RIGHT: {
   1412      LAllocation src;
   1413      bool reuse = false;
   1414      switch (*s.permuteOp) {
   1415        case SimdPermuteOp::MOVE:
   1416          reuse = true;
   1417          break;
   1418        case SimdPermuteOp::BROADCAST_8x16:
   1419        case SimdPermuteOp::BROADCAST_16x8:
   1420        case SimdPermuteOp::PERMUTE_8x16:
   1421        case SimdPermuteOp::PERMUTE_16x8:
   1422        case SimdPermuteOp::PERMUTE_32x4:
   1423        case SimdPermuteOp::ROTATE_RIGHT_8x16:
   1424        case SimdPermuteOp::SHIFT_LEFT_8x16:
   1425        case SimdPermuteOp::SHIFT_RIGHT_8x16:
   1426        case SimdPermuteOp::REVERSE_16x8:
   1427        case SimdPermuteOp::REVERSE_32x4:
   1428        case SimdPermuteOp::REVERSE_64x2:
   1429        case SimdPermuteOp::ZERO_EXTEND_8x16_TO_16x8:
   1430        case SimdPermuteOp::ZERO_EXTEND_8x16_TO_32x4:
   1431        case SimdPermuteOp::ZERO_EXTEND_8x16_TO_64x2:
   1432        case SimdPermuteOp::ZERO_EXTEND_16x8_TO_32x4:
   1433        case SimdPermuteOp::ZERO_EXTEND_16x8_TO_64x2:
   1434        case SimdPermuteOp::ZERO_EXTEND_32x4_TO_64x2:
   1435          // No need to reuse registers when VEX instructions are enabled.
   1436          reuse = !Assembler::HasAVX();
   1437          break;
   1438        default:
   1439          MOZ_CRASH("Unexpected operator");
   1440      }
   1441      if (s.opd == SimdShuffle::Operand::LEFT) {
   1442        src = useRegisterAtStart(ins->lhs());
   1443      } else {
   1444        src = useRegisterAtStart(ins->rhs());
   1445      }
   1446      auto* lir =
   1447          new (alloc()) LWasmPermuteSimd128(src, *s.permuteOp, s.control);
   1448      if (reuse) {
   1449        defineReuseInput(lir, ins, LWasmPermuteSimd128::SrcIndex);
   1450      } else {
   1451        define(lir, ins);
   1452      }
   1453      break;
   1454    }
   1455    case SimdShuffle::Operand::BOTH:
   1456    case SimdShuffle::Operand::BOTH_SWAPPED: {
   1457      LDefinition temp = LDefinition::BogusTemp();
   1458      switch (*s.shuffleOp) {
   1459        case SimdShuffleOp::BLEND_8x16:
   1460          temp = Assembler::HasAVX() ? tempSimd128() : tempFixed(xmm0);
   1461          break;
   1462        default:
   1463          break;
   1464      }
   1465      if (isThreeOpAllowed()) {
   1466        LAllocation lhs;
   1467        LAllocation rhs;
   1468        if (s.opd == SimdShuffle::Operand::BOTH) {
   1469          lhs = useRegisterAtStart(ins->lhs());
   1470          rhs = useRegisterAtStart(ins->rhs());
   1471        } else {
   1472          lhs = useRegisterAtStart(ins->rhs());
   1473          rhs = useRegisterAtStart(ins->lhs());
   1474        }
   1475        auto* lir = new (alloc())
   1476            LWasmShuffleSimd128(lhs, rhs, temp, *s.shuffleOp, s.control);
   1477        define(lir, ins);
   1478      } else {
   1479        LAllocation lhs;
   1480        LAllocation rhs;
   1481        if (s.opd == SimdShuffle::Operand::BOTH) {
   1482          lhs = useRegisterAtStart(ins->lhs());
   1483          rhs = useRegister(ins->rhs());
   1484        } else {
   1485          lhs = useRegisterAtStart(ins->rhs());
   1486          rhs = useRegister(ins->lhs());
   1487        }
   1488        auto* lir = new (alloc())
   1489            LWasmShuffleSimd128(lhs, rhs, temp, *s.shuffleOp, s.control);
   1490        defineReuseInput(lir, ins, LWasmShuffleSimd128::LhsIndex);
   1491      }
   1492      break;
   1493    }
   1494  }
   1495 #else
   1496  MOZ_CRASH("No SIMD");
   1497 #endif
   1498 }
   1499 
   1500 void LIRGenerator::visitWasmReplaceLaneSimd128(MWasmReplaceLaneSimd128* ins) {
   1501 #ifdef ENABLE_WASM_SIMD
   1502  MOZ_ASSERT(ins->lhs()->type() == MIRType::Simd128);
   1503  MOZ_ASSERT(ins->type() == MIRType::Simd128);
   1504 
   1505  // If AVX support is disabled, the Masm API is (rhs, lhsDest) and requires
   1506  // AtStart+ReuseInput for the lhs. For type reasons, the rhs will never be
   1507  // the same as the lhs and is therefore a plain Use.
   1508  //
   1509  // If AVX support is enabled, useRegisterAtStart is preferred.
   1510 
   1511  if (ins->rhs()->type() == MIRType::Int64) {
   1512    if (isThreeOpAllowed()) {
   1513      auto* lir = new (alloc()) LWasmReplaceInt64LaneSimd128(
   1514          useRegisterAtStart(ins->lhs()), useInt64RegisterAtStart(ins->rhs()));
   1515      define(lir, ins);
   1516    } else {
   1517      auto* lir = new (alloc()) LWasmReplaceInt64LaneSimd128(
   1518          useRegisterAtStart(ins->lhs()), useInt64Register(ins->rhs()));
   1519      defineReuseInput(lir, ins, LWasmReplaceInt64LaneSimd128::LhsIndex);
   1520    }
   1521  } else {
   1522    if (isThreeOpAllowed()) {
   1523      auto* lir = new (alloc()) LWasmReplaceLaneSimd128(
   1524          useRegisterAtStart(ins->lhs()), useRegisterAtStart(ins->rhs()));
   1525      define(lir, ins);
   1526    } else {
   1527      auto* lir = new (alloc()) LWasmReplaceLaneSimd128(
   1528          useRegisterAtStart(ins->lhs()), useRegister(ins->rhs()));
   1529      defineReuseInput(lir, ins, LWasmReplaceLaneSimd128::LhsIndex);
   1530    }
   1531  }
   1532 #else
   1533  MOZ_CRASH("No SIMD");
   1534 #endif
   1535 }
   1536 
   1537 void LIRGenerator::visitWasmScalarToSimd128(MWasmScalarToSimd128* ins) {
   1538 #ifdef ENABLE_WASM_SIMD
   1539  MOZ_ASSERT(ins->type() == MIRType::Simd128);
   1540 
   1541  switch (ins->input()->type()) {
   1542    case MIRType::Int64: {
   1543      // 64-bit integer splats.
   1544      // Load-and-(sign|zero)extend.
   1545      auto* lir = new (alloc())
   1546          LWasmInt64ToSimd128(useInt64RegisterAtStart(ins->input()));
   1547      define(lir, ins);
   1548      break;
   1549    }
   1550    case MIRType::Float32:
   1551    case MIRType::Double: {
   1552      // Floating-point splats.
   1553      // Ideally we save a move on SSE systems by reusing the input register,
   1554      // but since the input and output register types differ, we can't.
   1555      auto* lir =
   1556          new (alloc()) LWasmScalarToSimd128(useRegisterAtStart(ins->input()));
   1557      define(lir, ins);
   1558      break;
   1559    }
   1560    default: {
   1561      // 32-bit integer splats.
   1562      auto* lir =
   1563          new (alloc()) LWasmScalarToSimd128(useRegisterAtStart(ins->input()));
   1564      define(lir, ins);
   1565      break;
   1566    }
   1567  }
   1568 #else
   1569  MOZ_CRASH("No SIMD");
   1570 #endif
   1571 }
   1572 
   1573 void LIRGenerator::visitWasmUnarySimd128(MWasmUnarySimd128* ins) {
   1574 #ifdef ENABLE_WASM_SIMD
   1575  MOZ_ASSERT(ins->input()->type() == MIRType::Simd128);
   1576  MOZ_ASSERT(ins->type() == MIRType::Simd128);
   1577 
   1578  bool useAtStart = false;
   1579  bool reuseInput = false;
   1580  LDefinition tempReg = LDefinition::BogusTemp();
   1581  switch (ins->simdOp()) {
   1582    case wasm::SimdOp::I8x16Neg:
   1583    case wasm::SimdOp::I16x8Neg:
   1584    case wasm::SimdOp::I32x4Neg:
   1585    case wasm::SimdOp::I64x2Neg:
   1586    case wasm::SimdOp::I16x8ExtaddPairwiseI8x16S:
   1587      // Prefer src != dest to avoid an unconditional src->temp move.
   1588      MOZ_ASSERT(!reuseInput);
   1589      // If AVX is enabled, we prefer useRegisterAtStart.
   1590      useAtStart = isThreeOpAllowed();
   1591      break;
   1592    case wasm::SimdOp::F32x4Neg:
   1593    case wasm::SimdOp::F64x2Neg:
   1594    case wasm::SimdOp::F32x4Abs:
   1595    case wasm::SimdOp::F64x2Abs:
   1596    case wasm::SimdOp::V128Not:
   1597    case wasm::SimdOp::F32x4Sqrt:
   1598    case wasm::SimdOp::F64x2Sqrt:
   1599    case wasm::SimdOp::I8x16Abs:
   1600    case wasm::SimdOp::I16x8Abs:
   1601    case wasm::SimdOp::I32x4Abs:
   1602    case wasm::SimdOp::I64x2Abs:
   1603    case wasm::SimdOp::I32x4TruncSatF32x4S:
   1604    case wasm::SimdOp::F32x4ConvertI32x4U:
   1605    case wasm::SimdOp::I16x8ExtaddPairwiseI8x16U:
   1606    case wasm::SimdOp::I32x4ExtaddPairwiseI16x8S:
   1607    case wasm::SimdOp::I32x4ExtaddPairwiseI16x8U:
   1608    case wasm::SimdOp::I32x4RelaxedTruncF32x4S:
   1609    case wasm::SimdOp::I32x4RelaxedTruncF32x4U:
   1610    case wasm::SimdOp::I32x4RelaxedTruncF64x2SZero:
   1611    case wasm::SimdOp::I32x4RelaxedTruncF64x2UZero:
   1612    case wasm::SimdOp::I64x2ExtendHighI32x4S:
   1613    case wasm::SimdOp::I64x2ExtendHighI32x4U:
   1614      // Prefer src == dest to avoid an unconditional src->dest move
   1615      // for better performance in non-AVX mode (e.g. non-PSHUFD use).
   1616      useAtStart = true;
   1617      reuseInput = !isThreeOpAllowed();
   1618      break;
   1619    case wasm::SimdOp::I32x4TruncSatF32x4U:
   1620    case wasm::SimdOp::I32x4TruncSatF64x2SZero:
   1621    case wasm::SimdOp::I32x4TruncSatF64x2UZero:
   1622    case wasm::SimdOp::I8x16Popcnt:
   1623      tempReg = tempSimd128();
   1624      // Prefer src == dest to avoid an unconditional src->dest move
   1625      // in non-AVX mode.
   1626      useAtStart = true;
   1627      reuseInput = !isThreeOpAllowed();
   1628      break;
   1629    case wasm::SimdOp::I16x8ExtendLowI8x16S:
   1630    case wasm::SimdOp::I16x8ExtendHighI8x16S:
   1631    case wasm::SimdOp::I16x8ExtendLowI8x16U:
   1632    case wasm::SimdOp::I16x8ExtendHighI8x16U:
   1633    case wasm::SimdOp::I32x4ExtendLowI16x8S:
   1634    case wasm::SimdOp::I32x4ExtendHighI16x8S:
   1635    case wasm::SimdOp::I32x4ExtendLowI16x8U:
   1636    case wasm::SimdOp::I32x4ExtendHighI16x8U:
   1637    case wasm::SimdOp::I64x2ExtendLowI32x4S:
   1638    case wasm::SimdOp::I64x2ExtendLowI32x4U:
   1639    case wasm::SimdOp::F32x4ConvertI32x4S:
   1640    case wasm::SimdOp::F32x4Ceil:
   1641    case wasm::SimdOp::F32x4Floor:
   1642    case wasm::SimdOp::F32x4Trunc:
   1643    case wasm::SimdOp::F32x4Nearest:
   1644    case wasm::SimdOp::F64x2Ceil:
   1645    case wasm::SimdOp::F64x2Floor:
   1646    case wasm::SimdOp::F64x2Trunc:
   1647    case wasm::SimdOp::F64x2Nearest:
   1648    case wasm::SimdOp::F32x4DemoteF64x2Zero:
   1649    case wasm::SimdOp::F64x2PromoteLowF32x4:
   1650    case wasm::SimdOp::F64x2ConvertLowI32x4S:
   1651    case wasm::SimdOp::F64x2ConvertLowI32x4U:
   1652      // Prefer src == dest to exert the lowest register pressure on the
   1653      // surrounding code.
   1654      useAtStart = true;
   1655      MOZ_ASSERT(!reuseInput);
   1656      break;
   1657    default:
   1658      MOZ_CRASH("Unary SimdOp not implemented");
   1659  }
   1660 
   1661  LUse inputUse =
   1662      useAtStart ? useRegisterAtStart(ins->input()) : useRegister(ins->input());
   1663  LWasmUnarySimd128* lir = new (alloc()) LWasmUnarySimd128(inputUse, tempReg);
   1664  if (reuseInput) {
   1665    defineReuseInput(lir, ins, LWasmUnarySimd128::SrcIndex);
   1666  } else {
   1667    define(lir, ins);
   1668  }
   1669 #else
   1670  MOZ_CRASH("No SIMD");
   1671 #endif
   1672 }
   1673 
   1674 void LIRGenerator::visitWasmLoadLaneSimd128(MWasmLoadLaneSimd128* ins) {
   1675 #ifdef ENABLE_WASM_SIMD
   1676  // A trick: On 32-bit systems, the base pointer is 32 bits (it was bounds
   1677  // checked and then chopped).  On 64-bit systems, it can be 32 bits or 64
   1678  // bits.  Either way, it fits in a GPR so we can ignore the
   1679  // Register/Register64 distinction here.
   1680 #  ifndef JS_64BIT
   1681  MOZ_ASSERT(ins->base()->type() == MIRType::Int32);
   1682 #  endif
   1683  LUse base = useRegisterAtStart(ins->base());
   1684  LUse inputUse = useRegisterAtStart(ins->value());
   1685  LAllocation memoryBase = ins->hasMemoryBase()
   1686                               ? useRegisterAtStart(ins->memoryBase())
   1687                               : LAllocation();
   1688  auto* lir = new (alloc()) LWasmLoadLaneSimd128(base, inputUse, memoryBase);
   1689  defineReuseInput(lir, ins, LWasmLoadLaneSimd128::SrcIndex);
   1690 #else
   1691  MOZ_CRASH("No SIMD");
   1692 #endif
   1693 }
   1694 
   1695 void LIRGenerator::visitWasmStoreLaneSimd128(MWasmStoreLaneSimd128* ins) {
   1696 #ifdef ENABLE_WASM_SIMD
   1697  // See comment above.
   1698 #  ifndef JS_64BIT
   1699  MOZ_ASSERT(ins->base()->type() == MIRType::Int32);
   1700 #  endif
   1701  LUse base = useRegisterAtStart(ins->base());
   1702  LUse input = useRegisterAtStart(ins->value());
   1703  LAllocation memoryBase = ins->hasMemoryBase()
   1704                               ? useRegisterAtStart(ins->memoryBase())
   1705                               : LAllocation();
   1706  auto* lir = new (alloc()) LWasmStoreLaneSimd128(base, input, memoryBase);
   1707  add(lir, ins);
   1708 #else
   1709  MOZ_CRASH("No SIMD");
   1710 #endif
   1711 }
   1712 
   1713 #ifdef ENABLE_WASM_SIMD
   1714 
   1715 bool LIRGeneratorX86Shared::canFoldReduceSimd128AndBranch(wasm::SimdOp op) {
   1716  switch (op) {
   1717    case wasm::SimdOp::V128AnyTrue:
   1718    case wasm::SimdOp::I8x16AllTrue:
   1719    case wasm::SimdOp::I16x8AllTrue:
   1720    case wasm::SimdOp::I32x4AllTrue:
   1721    case wasm::SimdOp::I64x2AllTrue:
   1722    case wasm::SimdOp::I16x8Bitmask:
   1723      return true;
   1724    default:
   1725      return false;
   1726  }
   1727 }
   1728 
   1729 bool LIRGeneratorX86Shared::canEmitWasmReduceSimd128AtUses(
   1730    MWasmReduceSimd128* ins) {
   1731  if (!ins->canEmitAtUses()) {
   1732    return false;
   1733  }
   1734  // Only specific ops generating int32.
   1735  if (ins->type() != MIRType::Int32) {
   1736    return false;
   1737  }
   1738  if (!canFoldReduceSimd128AndBranch(ins->simdOp())) {
   1739    return false;
   1740  }
   1741  // If never used then defer (it will be removed).
   1742  MUseIterator iter(ins->usesBegin());
   1743  if (iter == ins->usesEnd()) {
   1744    return true;
   1745  }
   1746  // We require an MTest consumer.
   1747  MNode* node = iter->consumer();
   1748  if (!node->isDefinition() || !node->toDefinition()->isTest()) {
   1749    return false;
   1750  }
   1751  // Defer only if there's only one use.
   1752  iter++;
   1753  return iter == ins->usesEnd();
   1754 }
   1755 
   1756 #endif  // ENABLE_WASM_SIMD
   1757 
   1758 void LIRGenerator::visitWasmReduceSimd128(MWasmReduceSimd128* ins) {
   1759 #ifdef ENABLE_WASM_SIMD
   1760  if (canEmitWasmReduceSimd128AtUses(ins)) {
   1761    emitAtUses(ins);
   1762    return;
   1763  }
   1764 
   1765  // Reductions (any_true, all_true, bitmask, extract_lane) uniformly prefer
   1766  // useRegisterAtStart:
   1767  //
   1768  // - In most cases, the input type differs from the output type, so there's no
   1769  //   conflict and it doesn't really matter.
   1770  //
   1771  // - For extract_lane(0) on F32x4 and F64x2, input == output results in zero
   1772  //   code being generated.
   1773  //
   1774  // - For extract_lane(k > 0) on F32x4 and F64x2, allowing the input register
   1775  //   to be targeted lowers register pressure if it's the last use of the
   1776  //   input.
   1777 
   1778  if (ins->type() == MIRType::Int64) {
   1779    auto* lir = new (alloc())
   1780        LWasmReduceSimd128ToInt64(useRegisterAtStart(ins->input()));
   1781    defineInt64(lir, ins);
   1782  } else {
   1783    // Ideally we would reuse the input register for floating extract_lane if
   1784    // the lane is zero, but constraints in the register allocator require the
   1785    // input and output register types to be the same.
   1786    auto* lir =
   1787        new (alloc()) LWasmReduceSimd128(useRegisterAtStart(ins->input()));
   1788    define(lir, ins);
   1789  }
   1790 #else
   1791  MOZ_CRASH("No SIMD");
   1792 #endif
   1793 }