tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

MacroAssembler-x86-shared.h (40272B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 * This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #ifndef jit_x86_shared_MacroAssembler_x86_shared_h
      8 #define jit_x86_shared_MacroAssembler_x86_shared_h
      9 
     10 #if defined(JS_CODEGEN_X86)
     11 #  include "jit/x86/Assembler-x86.h"
     12 #elif defined(JS_CODEGEN_X64)
     13 #  include "jit/x64/Assembler-x64.h"
     14 #endif
     15 
     16 using js::wasm::FaultingCodeOffset;
     17 
     18 namespace js {
     19 namespace jit {
     20 
     21 class MacroAssembler;
     22 
     23 class MacroAssemblerX86Shared : public Assembler {
     24 private:
     25  // Perform a downcast. Should be removed by Bug 996602.
     26  MacroAssembler& asMasm();
     27  const MacroAssembler& asMasm() const;
     28 
     29 public:
     30 #ifdef JS_CODEGEN_X64
     31  using UsesItem = X86Encoding::JmpSrc;
     32 #else
     33  using UsesItem = CodeOffset;
     34 #endif
     35 
     36  using UsesVector = Vector<UsesItem, 0, SystemAllocPolicy>;
     37  static_assert(sizeof(UsesItem) == 4);
     38 
     39 protected:
     40  template <class T>
     41  struct Constant {
     42    using Pod = T;
     43 
     44    T value;
     45    UsesVector uses;
     46 
     47    explicit Constant(const T& value) : value(value) {}
     48 
     49    // Allow move operations, but not copying.
     50    Constant(Constant<T>&&) = default;
     51    Constant(const Constant<T>&) = delete;
     52  };
     53 
     54  // Containers use SystemAllocPolicy since wasm releases memory after each
     55  // function is compiled, and these need to live until after all functions
     56  // are compiled.
     57  using Double = Constant<double>;
     58  Vector<Double, 0, SystemAllocPolicy> doubles_;
     59  using DoubleMap =
     60      HashMap<double, size_t, DefaultHasher<double>, SystemAllocPolicy>;
     61  DoubleMap doubleMap_;
     62 
     63  using Float = Constant<float>;
     64  Vector<Float, 0, SystemAllocPolicy> floats_;
     65  using FloatMap =
     66      HashMap<float, size_t, DefaultHasher<float>, SystemAllocPolicy>;
     67  FloatMap floatMap_;
     68 
     69  struct SimdData : public Constant<SimdConstant> {
     70    explicit SimdData(SimdConstant d) : Constant<SimdConstant>(d) {}
     71    SimdData(SimdData&& d) : Constant<SimdConstant>(std::move(d)) {}
     72    explicit SimdData(const SimdData&) = delete;
     73    SimdConstant::Type type() const { return value.type(); }
     74  };
     75 
     76  Vector<SimdData, 0, SystemAllocPolicy> simds_;
     77  using SimdMap =
     78      HashMap<SimdConstant, size_t, SimdConstant, SystemAllocPolicy>;
     79  SimdMap simdMap_;
     80 
     81  template <class T, class Map>
     82  T* getConstant(const typename T::Pod& value, Map& map,
     83                 Vector<T, 0, SystemAllocPolicy>& vec);
     84 
     85  Float* getFloat(float f);
     86  Double* getDouble(double d);
     87  SimdData* getSimdData(const SimdConstant& v);
     88 
     89 public:
     90  using Assembler::call;
     91 
     92  MacroAssemblerX86Shared() = default;
     93 
     94  bool appendRawCode(const uint8_t* code, size_t numBytes) {
     95    return masm.appendRawCode(code, numBytes);
     96  }
     97 
     98  void addToPCRel4(uint32_t offset, int32_t bias) {
     99    return masm.addToPCRel4(offset, bias);
    100  }
    101 
    102  // Evaluate srcDest = minmax<isMax>{Float32,Double}(srcDest, second).
    103  // Checks for NaN if canBeNaN is true.
    104  void minMaxDouble(FloatRegister srcDest, FloatRegister second, bool canBeNaN,
    105                    bool isMax);
    106  void minMaxFloat32(FloatRegister srcDest, FloatRegister second, bool canBeNaN,
    107                     bool isMax);
    108 
    109  void compareDouble(DoubleCondition cond, FloatRegister lhs,
    110                     FloatRegister rhs) {
    111    if (cond & DoubleConditionBitInvert) {
    112      vucomisd(lhs, rhs);
    113    } else {
    114      vucomisd(rhs, lhs);
    115    }
    116  }
    117 
    118  void compareFloat(DoubleCondition cond, FloatRegister lhs,
    119                    FloatRegister rhs) {
    120    if (cond & DoubleConditionBitInvert) {
    121      vucomiss(lhs, rhs);
    122    } else {
    123      vucomiss(rhs, lhs);
    124    }
    125  }
    126 
    127  void branchNegativeZero(FloatRegister reg, Register scratch, Label* label,
    128                          bool maybeNonZero = true);
    129  void branchNegativeZeroFloat32(FloatRegister reg, Register scratch,
    130                                 Label* label);
    131 
    132  void move32(Imm32 imm, Register dest) {
    133    // Use the ImmWord version of mov to register, which has special
    134    // optimizations. Casting to uint32_t here ensures that the value
    135    // is zero-extended.
    136    mov(ImmWord(uint32_t(imm.value)), dest);
    137  }
    138  void move32(Imm32 imm, const Operand& dest) { movl(imm, dest); }
    139  void move32(Register src, Register dest) { movl(src, dest); }
    140  void move32(Register src, const Operand& dest) { movl(src, dest); }
    141  void test32(Register lhs, Register rhs) { testl(rhs, lhs); }
    142  void test32(const Address& addr, Imm32 imm) { testl(imm, Operand(addr)); }
    143  void test32(const Operand lhs, Imm32 imm) { testl(imm, lhs); }
    144  void test32(Register lhs, Imm32 rhs) { testl(rhs, lhs); }
    145  void cmp32(Register lhs, Imm32 rhs) { cmpl(rhs, lhs); }
    146  void cmp32(Register lhs, Register rhs) { cmpl(rhs, lhs); }
    147  void cmp32(const Address& lhs, Register rhs) { cmp32(Operand(lhs), rhs); }
    148  void cmp32(const Address& lhs, Imm32 rhs) { cmp32(Operand(lhs), rhs); }
    149  void cmp32(const Operand& lhs, Imm32 rhs) { cmpl(rhs, lhs); }
    150  void cmp32(const Operand& lhs, Register rhs) { cmpl(rhs, lhs); }
    151  void cmp32(Register lhs, const Operand& rhs) { cmpl(rhs, lhs); }
    152 
    153  void cmp16(const Address& lhs, Imm32 rhs) { cmp16(Operand(lhs), rhs); }
    154  void cmp16(const Operand& lhs, Imm32 rhs) { cmpw(rhs, lhs); }
    155 
    156  void cmp8(const Address& lhs, Imm32 rhs) { cmp8(Operand(lhs), rhs); }
    157  void cmp8(const Operand& lhs, Imm32 rhs) { cmpb(rhs, lhs); }
    158  void cmp8(const Operand& lhs, Register rhs) { cmpb(rhs, lhs); }
    159 
    160  void atomic_inc32(const Operand& addr) { lock_incl(addr); }
    161  void atomic_dec32(const Operand& addr) { lock_decl(addr); }
    162 
    163  void branch16(Condition cond, Register lhs, Register rhs, Label* label) {
    164    cmpw(rhs, lhs);
    165    j(cond, label);
    166  }
    167  void branchTest16(Condition cond, Register lhs, Register rhs, Label* label) {
    168    testw(rhs, lhs);
    169    j(cond, label);
    170  }
    171 
    172  void jump(Label* label) { jmp(label); }
    173  void jump(JitCode* code) { jmp(code); }
    174  void jump(TrampolinePtr code) { jmp(ImmPtr(code.value)); }
    175  void jump(ImmPtr ptr) { jmp(ptr); }
    176  void jump(Register reg) { jmp(Operand(reg)); }
    177  void jump(const Address& addr) { jmp(Operand(addr)); }
    178 
    179  void convertInt32ToDouble(Register src, FloatRegister dest) {
    180    // vcvtsi2sd and friends write only part of their output register, which
    181    // causes slowdowns on out-of-order processors. Explicitly break
    182    // dependencies with vxorpd (and vxorps elsewhere), which are handled
    183    // specially in modern CPUs, for this purpose. See sections 8.14, 9.8,
    184    // 10.8, 12.9, 13.16, 14.14, and 15.8 of Agner's Microarchitecture
    185    // document.
    186    zeroDouble(dest);
    187    vcvtsi2sd(src, dest, dest);
    188  }
    189  void convertInt32ToDouble(const Address& src, FloatRegister dest) {
    190    convertInt32ToDouble(Operand(src), dest);
    191  }
    192  void convertInt32ToDouble(const BaseIndex& src, FloatRegister dest) {
    193    convertInt32ToDouble(Operand(src), dest);
    194  }
    195  void convertInt32ToDouble(const Operand& src, FloatRegister dest) {
    196    // Clear the output register first to break dependencies; see above;
    197    zeroDouble(dest);
    198    vcvtsi2sd(Operand(src), dest, dest);
    199  }
    200  void convertInt32ToFloat32(Register src, FloatRegister dest) {
    201    // Clear the output register first to break dependencies; see above;
    202    zeroFloat32(dest);
    203    vcvtsi2ss(src, dest, dest);
    204  }
    205  void convertInt32ToFloat32(const Address& src, FloatRegister dest) {
    206    convertInt32ToFloat32(Operand(src), dest);
    207  }
    208  void convertInt32ToFloat32(const Operand& src, FloatRegister dest) {
    209    // Clear the output register first to break dependencies; see above;
    210    zeroFloat32(dest);
    211    vcvtsi2ss(src, dest, dest);
    212  }
    213  Condition testDoubleTruthy(bool truthy, FloatRegister reg) {
    214    ScratchDoubleScope scratch(asMasm());
    215    zeroDouble(scratch);
    216    vucomisd(reg, scratch);
    217    return truthy ? NonZero : Zero;
    218  }
    219 
    220  // Class which ensures that registers used in byte ops are compatible with
    221  // such instructions, even if the original register passed in wasn't. This
    222  // only applies to x86, as on x64 all registers are valid single byte regs.
    223  // This doesn't lead to great code but helps to simplify code generation.
    224  //
    225  // Note that this can currently only be used in cases where the register is
    226  // read from by the guarded instruction, not written to.
    227  class AutoEnsureByteRegister {
    228    MacroAssemblerX86Shared* masm;
    229    Register original_;
    230    Register substitute_;
    231 
    232   public:
    233    template <typename T>
    234    AutoEnsureByteRegister(MacroAssemblerX86Shared* masm, T address,
    235                           Register reg)
    236        : masm(masm), original_(reg) {
    237      AllocatableGeneralRegisterSet singleByteRegs(Registers::SingleByteRegs);
    238      if (singleByteRegs.has(reg)) {
    239        substitute_ = reg;
    240      } else {
    241        MOZ_ASSERT(address.base != StackPointer);
    242        do {
    243          substitute_ = singleByteRegs.takeAny();
    244        } while (Operand(address).containsReg(substitute_));
    245 
    246        masm->push(substitute_);
    247        masm->mov(reg, substitute_);
    248      }
    249    }
    250 
    251    ~AutoEnsureByteRegister() {
    252      if (original_ != substitute_) {
    253        masm->pop(substitute_);
    254      }
    255    }
    256 
    257    Register reg() { return substitute_; }
    258  };
    259 
    260  void load8ZeroExtend(const Operand& src, Register dest) { movzbl(src, dest); }
    261  FaultingCodeOffset load8ZeroExtend(const Address& src, Register dest) {
    262    FaultingCodeOffset fco = FaultingCodeOffset(currentOffset());
    263    movzbl(Operand(src), dest);
    264    return fco;
    265  }
    266  FaultingCodeOffset load8ZeroExtend(const BaseIndex& src, Register dest) {
    267    FaultingCodeOffset fco = FaultingCodeOffset(currentOffset());
    268    movzbl(Operand(src), dest);
    269    return fco;
    270  }
    271  void load8SignExtend(const Operand& src, Register dest) { movsbl(src, dest); }
    272  FaultingCodeOffset load8SignExtend(const Address& src, Register dest) {
    273    FaultingCodeOffset fco = FaultingCodeOffset(currentOffset());
    274    movsbl(Operand(src), dest);
    275    return fco;
    276  }
    277  FaultingCodeOffset load8SignExtend(const BaseIndex& src, Register dest) {
    278    FaultingCodeOffset fco = FaultingCodeOffset(currentOffset());
    279    movsbl(Operand(src), dest);
    280    return fco;
    281  }
    282  template <typename T>
    283  void store8(Imm32 src, const T& dest) {
    284    movb(src, Operand(dest));
    285  }
    286  template <typename T>
    287  FaultingCodeOffset store8(Register src, const T& dest) {
    288    AutoEnsureByteRegister ensure(this, dest, src);
    289    // We must read the current offset only after AutoEnsureByteRegister's
    290    // constructor has done its thing, since it may insert instructions, and
    291    // we want to get the offset for the `movb` itself.
    292    FaultingCodeOffset fco = FaultingCodeOffset(currentOffset());
    293    movb(ensure.reg(), Operand(dest));
    294    return fco;
    295  }
    296  void load16ZeroExtend(const Operand& src, Register dest) {
    297    movzwl(src, dest);
    298  }
    299  FaultingCodeOffset load16ZeroExtend(const Address& src, Register dest) {
    300    FaultingCodeOffset fco = FaultingCodeOffset(currentOffset());
    301    movzwl(Operand(src), dest);
    302    return fco;
    303  }
    304  FaultingCodeOffset load16ZeroExtend(const BaseIndex& src, Register dest) {
    305    FaultingCodeOffset fco = FaultingCodeOffset(currentOffset());
    306    movzwl(Operand(src), dest);
    307    return fco;
    308  }
    309  template <typename S>
    310  void load16UnalignedZeroExtend(const S& src, Register dest) {
    311    load16ZeroExtend(src, dest);
    312  }
    313  template <typename S, typename T>
    314  FaultingCodeOffset store16(const S& src, const T& dest) {
    315    FaultingCodeOffset fco = FaultingCodeOffset(currentOffset());
    316    movw(src, Operand(dest));
    317    return fco;
    318  }
    319  template <typename S, typename T>
    320  void store16Unaligned(const S& src, const T& dest) {
    321    store16(src, dest);
    322  }
    323  void load16SignExtend(const Operand& src, Register dest) {
    324    movswl(src, dest);
    325  }
    326  FaultingCodeOffset load16SignExtend(const Address& src, Register dest) {
    327    FaultingCodeOffset fco = FaultingCodeOffset(currentOffset());
    328    movswl(Operand(src), dest);
    329    return fco;
    330  }
    331  FaultingCodeOffset load16SignExtend(const BaseIndex& src, Register dest) {
    332    FaultingCodeOffset fco = FaultingCodeOffset(currentOffset());
    333    movswl(Operand(src), dest);
    334    return fco;
    335  }
    336  template <typename S>
    337  void load16UnalignedSignExtend(const S& src, Register dest) {
    338    load16SignExtend(src, dest);
    339  }
    340  FaultingCodeOffset load32(const Address& address, Register dest) {
    341    FaultingCodeOffset fco = FaultingCodeOffset(currentOffset());
    342    movl(Operand(address), dest);
    343    return fco;
    344  }
    345  FaultingCodeOffset load32(const BaseIndex& src, Register dest) {
    346    FaultingCodeOffset fco = FaultingCodeOffset(currentOffset());
    347    movl(Operand(src), dest);
    348    return fco;
    349  }
    350  void load32(const Operand& src, Register dest) { movl(src, dest); }
    351  template <typename S>
    352  void load32Unaligned(const S& src, Register dest) {
    353    load32(src, dest);
    354  }
    355  template <typename S, typename T>
    356  FaultingCodeOffset store32(const S& src, const T& dest) {
    357    FaultingCodeOffset fco = FaultingCodeOffset(currentOffset());
    358    movl(src, Operand(dest));
    359    return fco;
    360  }
    361  template <typename S, typename T>
    362  void store32Unaligned(const S& src, const T& dest) {
    363    store32(src, dest);
    364  }
    365  FaultingCodeOffset loadDouble(const Address& src, FloatRegister dest) {
    366    FaultingCodeOffset fco = FaultingCodeOffset(currentOffset());
    367    vmovsd(src, dest);
    368    return fco;
    369  }
    370  FaultingCodeOffset loadDouble(const BaseIndex& src, FloatRegister dest) {
    371    FaultingCodeOffset fco = FaultingCodeOffset(currentOffset());
    372    vmovsd(src, dest);
    373    return fco;
    374  }
    375  void loadDouble(const Operand& src, FloatRegister dest) {
    376    switch (src.kind()) {
    377      case Operand::MEM_REG_DISP:
    378        loadDouble(src.toAddress(), dest);
    379        break;
    380      case Operand::MEM_SCALE:
    381        loadDouble(src.toBaseIndex(), dest);
    382        break;
    383      default:
    384        MOZ_CRASH("unexpected operand kind");
    385    }
    386  }
    387  void moveDouble(FloatRegister src, FloatRegister dest) {
    388    // Use vmovapd instead of vmovsd to avoid dependencies.
    389    vmovapd(src, dest);
    390  }
    391  void zeroDouble(FloatRegister reg) { vxorpd(reg, reg, reg); }
    392  void zeroFloat32(FloatRegister reg) { vxorps(reg, reg, reg); }
    393  void convertFloat32ToDouble(FloatRegister src, FloatRegister dest) {
    394    vcvtss2sd(src, dest, dest);
    395  }
    396  void convertDoubleToFloat32(FloatRegister src, FloatRegister dest) {
    397    vcvtsd2ss(src, dest, dest);
    398  }
    399 
    400  void convertDoubleToFloat16(FloatRegister src, FloatRegister dest) {
    401    MOZ_CRASH("Not supported for this target");
    402  }
    403  void convertFloat16ToDouble(FloatRegister src, FloatRegister dest) {
    404    convertFloat16ToFloat32(src, dest);
    405    convertFloat32ToDouble(dest, dest);
    406  }
    407  void convertFloat32ToFloat16(FloatRegister src, FloatRegister dest) {
    408    vcvtps2ph(src, dest);
    409  }
    410  void convertFloat16ToFloat32(FloatRegister src, FloatRegister dest) {
    411    // Zero extend word to quadword. This ensures all high words in the result
    412    // are zeroed after vcvtph2ps.
    413    vpmovzxwq(Operand(src), dest);
    414 
    415    // Convert Float16 to Float32.
    416    vcvtph2ps(dest, dest);
    417  }
    418  void convertInt32ToFloat16(Register src, FloatRegister dest) {
    419    // Clear the output register first to break dependencies; see above;
    420    //
    421    // This also ensures all high words in the result are zeroed.
    422    zeroFloat32(dest);
    423 
    424    // Convert Int32 to Float32.
    425    vcvtsi2ss(src, dest, dest);
    426 
    427    // Convert Float32 to Float16.
    428    vcvtps2ph(dest, dest);
    429  }
    430 
    431  void loadInt32x4(const Address& addr, FloatRegister dest) {
    432    vmovdqa(Operand(addr), dest);
    433  }
    434  void loadFloat32x4(const Address& addr, FloatRegister dest) {
    435    vmovaps(Operand(addr), dest);
    436  }
    437  void storeInt32x4(FloatRegister src, const Address& addr) {
    438    vmovdqa(src, Operand(addr));
    439  }
    440  void storeFloat32x4(FloatRegister src, const Address& addr) {
    441    vmovaps(src, Operand(addr));
    442  }
    443 
    444  void convertFloat32x4ToInt32x4(FloatRegister src, FloatRegister dest) {
    445    // Note that if the conversion failed (because the converted
    446    // result is larger than the maximum signed int32, or less than the
    447    // least signed int32, or NaN), this will return the undefined integer
    448    // value (0x8000000).
    449    vcvttps2dq(src, dest);
    450  }
    451  void convertInt32x4ToFloat32x4(FloatRegister src, FloatRegister dest) {
    452    vcvtdq2ps(src, dest);
    453  }
    454 
    455  void binarySimd128(const SimdConstant& rhs, FloatRegister lhsDest,
    456                     void (MacroAssembler::*regOp)(const Operand&,
    457                                                   FloatRegister,
    458                                                   FloatRegister),
    459                     void (MacroAssembler::*constOp)(const SimdConstant&,
    460                                                     FloatRegister));
    461  void binarySimd128(
    462      FloatRegister lhs, const SimdConstant& rhs, FloatRegister dest,
    463      void (MacroAssembler::*regOp)(const Operand&, FloatRegister,
    464                                    FloatRegister),
    465      void (MacroAssembler::*constOp)(const SimdConstant&, FloatRegister,
    466                                      FloatRegister));
    467  void binarySimd128(const SimdConstant& rhs, FloatRegister lhsDest,
    468                     void (MacroAssembler::*regOp)(const Operand&,
    469                                                   FloatRegister),
    470                     void (MacroAssembler::*constOp)(const SimdConstant&,
    471                                                     FloatRegister));
    472 
    473  // SIMD methods, defined in MacroAssembler-x86-shared-SIMD.cpp.
    474 
    475  void unsignedConvertInt32x4ToFloat32x4(FloatRegister src, FloatRegister dest);
    476  void unsignedConvertInt32x4ToFloat64x2(FloatRegister src, FloatRegister dest);
    477  void bitwiseTestSimd128(const SimdConstant& rhs, FloatRegister lhs);
    478 
    479  void truncSatFloat32x4ToInt32x4(FloatRegister src, FloatRegister dest);
    480  void unsignedTruncSatFloat32x4ToInt32x4(FloatRegister src, FloatRegister temp,
    481                                          FloatRegister dest);
    482  void unsignedTruncFloat32x4ToInt32x4Relaxed(FloatRegister src,
    483                                              FloatRegister dest);
    484  void truncSatFloat64x2ToInt32x4(FloatRegister src, FloatRegister temp,
    485                                  FloatRegister dest);
    486  void unsignedTruncSatFloat64x2ToInt32x4(FloatRegister src, FloatRegister temp,
    487                                          FloatRegister dest);
    488  void unsignedTruncFloat64x2ToInt32x4Relaxed(FloatRegister src,
    489                                              FloatRegister dest);
    490 
    491  void splatX16(Register input, FloatRegister output);
    492  void splatX8(Register input, FloatRegister output);
    493  void splatX4(Register input, FloatRegister output);
    494  void splatX4(FloatRegister input, FloatRegister output);
    495  void splatX2(FloatRegister input, FloatRegister output);
    496 
    497  void extractLaneInt32x4(FloatRegister input, Register output, unsigned lane);
    498  void extractLaneFloat32x4(FloatRegister input, FloatRegister output,
    499                            unsigned lane);
    500  void extractLaneFloat64x2(FloatRegister input, FloatRegister output,
    501                            unsigned lane);
    502  void extractLaneInt16x8(FloatRegister input, Register output, unsigned lane,
    503                          SimdSign sign);
    504  void extractLaneInt8x16(FloatRegister input, Register output, unsigned lane,
    505                          SimdSign sign);
    506 
    507  void replaceLaneFloat32x4(unsigned lane, FloatRegister lhs, FloatRegister rhs,
    508                            FloatRegister dest);
    509  void replaceLaneFloat64x2(unsigned lane, FloatRegister lhs, FloatRegister rhs,
    510                            FloatRegister dest);
    511 
    512  void shuffleInt8x16(FloatRegister lhs, FloatRegister rhs,
    513                      FloatRegister output, const uint8_t lanes[16]);
    514  void blendInt8x16(FloatRegister lhs, FloatRegister rhs, FloatRegister output,
    515                    FloatRegister temp, const uint8_t lanes[16]);
    516  void blendInt16x8(FloatRegister lhs, FloatRegister rhs, FloatRegister output,
    517                    const uint16_t lanes[8]);
    518  void laneSelectSimd128(FloatRegister mask, FloatRegister lhs,
    519                         FloatRegister rhs, FloatRegister output);
    520 
    521  void compareInt8x16(FloatRegister lhs, Operand rhs, Assembler::Condition cond,
    522                      FloatRegister output);
    523  void compareInt8x16(Assembler::Condition cond, FloatRegister lhs,
    524                      const SimdConstant& rhs, FloatRegister dest);
    525  void compareInt16x8(FloatRegister lhs, Operand rhs, Assembler::Condition cond,
    526                      FloatRegister output);
    527  void compareInt16x8(Assembler::Condition cond, FloatRegister lhs,
    528                      const SimdConstant& rhs, FloatRegister dest);
    529  void compareInt32x4(FloatRegister lhs, Operand rhs, Assembler::Condition cond,
    530                      FloatRegister output);
    531  void compareInt32x4(Assembler::Condition cond, FloatRegister lhs,
    532                      const SimdConstant& rhs, FloatRegister dest);
    533  void compareForEqualityInt64x2(FloatRegister lhs, Operand rhs,
    534                                 Assembler::Condition cond,
    535                                 FloatRegister output);
    536  void compareForOrderingInt64x2(FloatRegister lhs, Operand rhs,
    537                                 Assembler::Condition cond, FloatRegister temp1,
    538                                 FloatRegister temp2, FloatRegister output);
    539  void compareForOrderingInt64x2AVX(FloatRegister lhs, FloatRegister rhs,
    540                                    Assembler::Condition cond,
    541                                    FloatRegister output);
    542  void compareFloat32x4(FloatRegister lhs, Operand rhs,
    543                        Assembler::Condition cond, FloatRegister output);
    544  void compareFloat32x4(Assembler::Condition cond, FloatRegister lhs,
    545                        const SimdConstant& rhs, FloatRegister dest);
    546  void compareFloat64x2(FloatRegister lhs, Operand rhs,
    547                        Assembler::Condition cond, FloatRegister output);
    548  void compareFloat64x2(Assembler::Condition cond, FloatRegister lhs,
    549                        const SimdConstant& rhs, FloatRegister dest);
    550 
    551  void minMaxFloat32x4(bool isMin, FloatRegister lhs, Operand rhs,
    552                       FloatRegister temp1, FloatRegister temp2,
    553                       FloatRegister output);
    554  void minMaxFloat32x4AVX(bool isMin, FloatRegister lhs, FloatRegister rhs,
    555                          FloatRegister temp1, FloatRegister temp2,
    556                          FloatRegister output);
    557  void minMaxFloat64x2(bool isMin, FloatRegister lhs, Operand rhs,
    558                       FloatRegister temp1, FloatRegister temp2,
    559                       FloatRegister output);
    560  void minMaxFloat64x2AVX(bool isMin, FloatRegister lhs, FloatRegister rhs,
    561                          FloatRegister temp1, FloatRegister temp2,
    562                          FloatRegister output);
    563  void minFloat32x4(FloatRegister lhs, FloatRegister rhs, FloatRegister temp1,
    564                    FloatRegister temp2, FloatRegister output);
    565  void maxFloat32x4(FloatRegister lhs, FloatRegister rhs, FloatRegister temp1,
    566                    FloatRegister temp2, FloatRegister output);
    567 
    568  void minFloat64x2(FloatRegister lhs, FloatRegister rhs, FloatRegister temp1,
    569                    FloatRegister temp2, FloatRegister output);
    570  void maxFloat64x2(FloatRegister lhs, FloatRegister rhs, FloatRegister temp1,
    571                    FloatRegister temp2, FloatRegister output);
    572 
    573  void packedShiftByScalarInt8x16(
    574      FloatRegister in, Register count, FloatRegister xtmp, FloatRegister dest,
    575      void (MacroAssemblerX86Shared::*shift)(FloatRegister, FloatRegister,
    576                                             FloatRegister),
    577      void (MacroAssemblerX86Shared::*extend)(const Operand&, FloatRegister));
    578 
    579  void packedLeftShiftByScalarInt8x16(FloatRegister in, Register count,
    580                                      FloatRegister xtmp, FloatRegister dest);
    581  void packedLeftShiftByScalarInt8x16(Imm32 count, FloatRegister src,
    582                                      FloatRegister dest);
    583  void packedRightShiftByScalarInt8x16(FloatRegister in, Register count,
    584                                       FloatRegister xtmp, FloatRegister dest);
    585  void packedRightShiftByScalarInt8x16(Imm32 count, FloatRegister src,
    586                                       FloatRegister dest);
    587  void packedUnsignedRightShiftByScalarInt8x16(FloatRegister in, Register count,
    588                                               FloatRegister xtmp,
    589                                               FloatRegister dest);
    590  void packedUnsignedRightShiftByScalarInt8x16(Imm32 count, FloatRegister src,
    591                                               FloatRegister dest);
    592 
    593  void packedLeftShiftByScalarInt16x8(FloatRegister in, Register count,
    594                                      FloatRegister dest);
    595  void packedRightShiftByScalarInt16x8(FloatRegister in, Register count,
    596                                       FloatRegister dest);
    597  void packedUnsignedRightShiftByScalarInt16x8(FloatRegister in, Register count,
    598                                               FloatRegister dest);
    599 
    600  void packedLeftShiftByScalarInt32x4(FloatRegister in, Register count,
    601                                      FloatRegister dest);
    602  void packedRightShiftByScalarInt32x4(FloatRegister in, Register count,
    603                                       FloatRegister dest);
    604  void packedUnsignedRightShiftByScalarInt32x4(FloatRegister in, Register count,
    605                                               FloatRegister dest);
    606  void packedLeftShiftByScalarInt64x2(FloatRegister in, Register count,
    607                                      FloatRegister dest);
    608  void packedRightShiftByScalarInt64x2(FloatRegister in, Register count,
    609                                       FloatRegister temp, FloatRegister dest);
    610  void packedRightShiftByScalarInt64x2(Imm32 count, FloatRegister src,
    611                                       FloatRegister dest);
    612  void packedUnsignedRightShiftByScalarInt64x2(FloatRegister in, Register count,
    613                                               FloatRegister dest);
    614  void selectSimd128(FloatRegister mask, FloatRegister onTrue,
    615                     FloatRegister onFalse, FloatRegister temp,
    616                     FloatRegister output);
    617  void popcntInt8x16(FloatRegister src, FloatRegister temp,
    618                     FloatRegister output);
    619 
    620  // SIMD inline methods private to the implementation, that appear to be used.
    621 
    622  void loadAlignedSimd128Int(const Operand& src, FloatRegister dest) {
    623    vmovdqa(src, dest);
    624  }
    625  void moveSimd128Int(FloatRegister src, FloatRegister dest) {
    626    if (src != dest) {
    627      vmovdqa(src, dest);
    628    }
    629  }
    630  FloatRegister moveSimd128IntIfNotAVX(FloatRegister src, FloatRegister dest) {
    631    MOZ_ASSERT(src.isSimd128() && dest.isSimd128());
    632    if (HasAVX()) {
    633      return src;
    634    }
    635    moveSimd128Int(src, dest);
    636    return dest;
    637  }
    638  FloatRegister selectDestIfAVX(FloatRegister src, FloatRegister dest) {
    639    MOZ_ASSERT(src.isSimd128() && dest.isSimd128());
    640    return HasAVX() ? dest : src;
    641  }
    642  FaultingCodeOffset loadUnalignedSimd128Int(const Address& src,
    643                                             FloatRegister dest) {
    644    FaultingCodeOffset fco = FaultingCodeOffset(currentOffset());
    645    vmovdqu(Operand(src), dest);
    646    return fco;
    647  }
    648  FaultingCodeOffset loadUnalignedSimd128Int(const BaseIndex& src,
    649                                             FloatRegister dest) {
    650    FaultingCodeOffset fco = FaultingCodeOffset(currentOffset());
    651    vmovdqu(Operand(src), dest);
    652    return fco;
    653  }
    654  void loadUnalignedSimd128Int(const Operand& src, FloatRegister dest) {
    655    vmovdqu(src, dest);
    656  }
    657  FaultingCodeOffset storeUnalignedSimd128Int(FloatRegister src,
    658                                              const Address& dest) {
    659    FaultingCodeOffset fco = FaultingCodeOffset(currentOffset());
    660    vmovdqu(src, Operand(dest));
    661    return fco;
    662  }
    663  FaultingCodeOffset storeUnalignedSimd128Int(FloatRegister src,
    664                                              const BaseIndex& dest) {
    665    FaultingCodeOffset fco = FaultingCodeOffset(currentOffset());
    666    vmovdqu(src, Operand(dest));
    667    return fco;
    668  }
    669  void storeUnalignedSimd128Int(FloatRegister src, const Operand& dest) {
    670    vmovdqu(src, dest);
    671  }
    672  void packedLeftShiftByScalarInt16x8(Imm32 count, FloatRegister dest) {
    673    count.value &= 15;
    674    vpsllw(count, dest, dest);
    675  }
    676  void packedRightShiftByScalarInt16x8(Imm32 count, FloatRegister dest) {
    677    count.value &= 15;
    678    vpsraw(count, dest, dest);
    679  }
    680  void packedUnsignedRightShiftByScalarInt16x8(Imm32 count,
    681                                               FloatRegister dest) {
    682    count.value &= 15;
    683    vpsrlw(count, dest, dest);
    684  }
    685  void packedLeftShiftByScalarInt32x4(Imm32 count, FloatRegister dest) {
    686    count.value &= 31;
    687    vpslld(count, dest, dest);
    688  }
    689  void packedRightShiftByScalarInt32x4(Imm32 count, FloatRegister dest) {
    690    count.value &= 31;
    691    vpsrad(count, dest, dest);
    692  }
    693  void packedUnsignedRightShiftByScalarInt32x4(Imm32 count,
    694                                               FloatRegister dest) {
    695    count.value &= 31;
    696    vpsrld(count, dest, dest);
    697  }
    698  void loadAlignedSimd128Float(const Address& src, FloatRegister dest) {
    699    vmovaps(Operand(src), dest);
    700  }
    701  void loadAlignedSimd128Float(const Operand& src, FloatRegister dest) {
    702    vmovaps(src, dest);
    703  }
    704  void storeAlignedSimd128Float(FloatRegister src, const Address& dest) {
    705    vmovaps(src, Operand(dest));
    706  }
    707  void moveSimd128Float(FloatRegister src, FloatRegister dest) {
    708    if (src != dest) {
    709      vmovaps(src, dest);
    710    }
    711  }
    712  FloatRegister moveSimd128FloatIfNotAVX(FloatRegister src,
    713                                         FloatRegister dest) {
    714    MOZ_ASSERT(src.isSimd128() && dest.isSimd128());
    715    if (HasAVX()) {
    716      return src;
    717    }
    718    moveSimd128Float(src, dest);
    719    return dest;
    720  }
    721  FloatRegister moveSimd128FloatIfEqual(FloatRegister src, FloatRegister dest,
    722                                        FloatRegister other) {
    723    MOZ_ASSERT(src.isSimd128() && dest.isSimd128());
    724    if (src != other) {
    725      return src;
    726    }
    727    moveSimd128Float(src, dest);
    728    return dest;
    729  }
    730  FloatRegister moveSimd128FloatIfNotAVXOrOther(FloatRegister src,
    731                                                FloatRegister dest,
    732                                                FloatRegister other) {
    733    MOZ_ASSERT(src.isSimd128() && dest.isSimd128());
    734    if (HasAVX() && src != other) {
    735      return src;
    736    }
    737    moveSimd128Float(src, dest);
    738    return dest;
    739  }
    740 
    741  FaultingCodeOffset loadUnalignedSimd128(const Operand& src,
    742                                          FloatRegister dest) {
    743    FaultingCodeOffset fco = FaultingCodeOffset(currentOffset());
    744    vmovups(src, dest);
    745    return fco;
    746  }
    747  FaultingCodeOffset storeUnalignedSimd128(FloatRegister src,
    748                                           const Operand& dest) {
    749    FaultingCodeOffset fco = FaultingCodeOffset(currentOffset());
    750    vmovups(src, dest);
    751    return fco;
    752  }
    753 
    754  static uint32_t ComputeShuffleMask(uint32_t x = 0, uint32_t y = 1,
    755                                     uint32_t z = 2, uint32_t w = 3) {
    756    MOZ_ASSERT(x < 4 && y < 4 && z < 4 && w < 4);
    757    uint32_t r = (w << 6) | (z << 4) | (y << 2) | (x << 0);
    758    MOZ_ASSERT(r < 256);
    759    return r;
    760  }
    761 
    762  void shuffleInt32(uint32_t mask, FloatRegister src, FloatRegister dest) {
    763    vpshufd(mask, src, dest);
    764  }
    765  void moveLowInt32(FloatRegister src, Register dest) { vmovd(src, dest); }
    766 
    767  void moveHighPairToLowPairFloat32(FloatRegister src, FloatRegister dest) {
    768    vmovhlps(src, dest, dest);
    769  }
    770  FaultingCodeOffset loadFloat32(const Address& src, FloatRegister dest) {
    771    FaultingCodeOffset fco = FaultingCodeOffset(currentOffset());
    772    vmovss(src, dest);
    773    return fco;
    774  }
    775  FaultingCodeOffset loadFloat32(const BaseIndex& src, FloatRegister dest) {
    776    FaultingCodeOffset fco = FaultingCodeOffset(currentOffset());
    777    vmovss(src, dest);
    778    return fco;
    779  }
    780  void loadFloat32(const Operand& src, FloatRegister dest) {
    781    switch (src.kind()) {
    782      case Operand::MEM_REG_DISP:
    783        loadFloat32(src.toAddress(), dest);
    784        break;
    785      case Operand::MEM_SCALE:
    786        loadFloat32(src.toBaseIndex(), dest);
    787        break;
    788      default:
    789        MOZ_CRASH("unexpected operand kind");
    790    }
    791  }
    792  void moveFloat32(FloatRegister src, FloatRegister dest) {
    793    // Use vmovaps instead of vmovss to avoid dependencies.
    794    vmovaps(src, dest);
    795  }
    796 
    797  FaultingCodeOffset loadFloat16(const Address& addr, FloatRegister dest,
    798                                 Register scratch) {
    799    auto fco = load16ZeroExtend(addr, scratch);
    800 
    801    // Move from GPR to FloatRegister.
    802    vmovd(scratch, dest);
    803 
    804    return fco;
    805  }
    806  FaultingCodeOffset loadFloat16(const BaseIndex& src, FloatRegister dest,
    807                                 Register scratch) {
    808    auto fco = load16ZeroExtend(src, scratch);
    809 
    810    // Move from GPR to FloatRegister.
    811    vmovd(scratch, dest);
    812 
    813    return fco;
    814  }
    815 
    816  // Checks whether a double is representable as a 32-bit integer. If so, the
    817  // integer is written to the output register. Otherwise, a bailout is taken to
    818  // the given snapshot. This function overwrites the scratch float register.
    819  void convertDoubleToInt32(FloatRegister src, Register dest, Label* fail,
    820                            bool negativeZeroCheck = true) {
    821    // Check for -0.0
    822    if (negativeZeroCheck) {
    823      branchNegativeZero(src, dest, fail);
    824    }
    825 
    826    ScratchDoubleScope scratch(asMasm());
    827    vcvttsd2si(src, dest);
    828    convertInt32ToDouble(dest, scratch);
    829    vucomisd(scratch, src);
    830    j(Assembler::Parity, fail);
    831    j(Assembler::NotEqual, fail);
    832  }
    833 
    834  // Checks whether a float32 is representable as a 32-bit integer. If so, the
    835  // integer is written to the output register. Otherwise, a bailout is taken to
    836  // the given snapshot. This function overwrites the scratch float register.
    837  void convertFloat32ToInt32(FloatRegister src, Register dest, Label* fail,
    838                             bool negativeZeroCheck = true) {
    839    // Check for -0.0
    840    if (negativeZeroCheck) {
    841      branchNegativeZeroFloat32(src, dest, fail);
    842    }
    843 
    844    ScratchFloat32Scope scratch(asMasm());
    845    vcvttss2si(src, dest);
    846    convertInt32ToFloat32(dest, scratch);
    847    vucomiss(scratch, src);
    848    j(Assembler::Parity, fail);
    849    j(Assembler::NotEqual, fail);
    850  }
    851 
    852  void truncateDoubleToInt32(FloatRegister src, Register dest, Label* fail) {
    853    // vcvttsd2si returns 0x80000000 on failure. Test for it by
    854    // subtracting 1 and testing overflow. The other possibility is to test
    855    // equality for INT_MIN after a comparison, but 1 costs fewer bytes to
    856    // materialize.
    857    vcvttsd2si(src, dest);
    858    cmp32(dest, Imm32(1));
    859    j(Assembler::Overflow, fail);
    860  }
    861  void truncateFloat32ToInt32(FloatRegister src, Register dest, Label* fail) {
    862    // Same trick as explained in the above comment.
    863    vcvttss2si(src, dest);
    864    cmp32(dest, Imm32(1));
    865    j(Assembler::Overflow, fail);
    866  }
    867 
    868  inline void clampIntToUint8(Register reg);
    869 
    870  bool maybeInlineDouble(double d, FloatRegister dest) {
    871    // Loading zero with xor is specially optimized in hardware.
    872    if (mozilla::IsPositiveZero(d)) {
    873      zeroDouble(dest);
    874      return true;
    875    }
    876 
    877    // It is also possible to load several common constants using vpcmpeqw
    878    // to get all ones and then vpsllq and vpsrlq to get zeros at the ends,
    879    // as described in "13.4 Generating constants" of
    880    // "2. Optimizing subroutines in assembly language" by Agner Fog, and as
    881    // previously implemented here. However, with x86 and x64 both using
    882    // constant pool loads for double constants, this is probably only
    883    // worthwhile in cases where a load is likely to be delayed.
    884 
    885    return false;
    886  }
    887 
    888  bool maybeInlineFloat(float f, FloatRegister dest) {
    889    // See comment above
    890    if (mozilla::IsPositiveZero(f)) {
    891      zeroFloat32(dest);
    892      return true;
    893    }
    894    return false;
    895  }
    896 
    897  bool maybeInlineSimd128Int(const SimdConstant& v, const FloatRegister& dest) {
    898    if (v.isZeroBits()) {
    899      vpxor(dest, dest, dest);
    900      return true;
    901    }
    902    if (v.isOneBits()) {
    903      vpcmpeqw(Operand(dest), dest, dest);
    904      return true;
    905    }
    906    return false;
    907  }
    908  bool maybeInlineSimd128Float(const SimdConstant& v,
    909                               const FloatRegister& dest) {
    910    if (v.isZeroBits()) {
    911      vxorps(dest, dest, dest);
    912      return true;
    913    }
    914    return false;
    915  }
    916 
    917  void convertBoolToInt32(Register source, Register dest) {
    918    // Note that C++ bool is only 1 byte, so zero extend it to clear the
    919    // higher-order bits.
    920    movzbl(source, dest);
    921  }
    922 
    923 private:
    924  template <typename T>
    925  static bool aliasesEmitSetRegister(T src, Register dest) {
    926    if constexpr (std::is_base_of_v<Register, T>) {
    927      return src == dest;
    928    } else if constexpr (std::is_base_of_v<Address, T>) {
    929      return src.base == dest;
    930    } else if constexpr (std::is_base_of_v<BaseIndex, T>) {
    931      return src.base == dest || src.index == dest;
    932    } else if constexpr (std::is_base_of_v<ValueOperand, T>) {
    933      return src.aliases(dest);
    934    } else {
    935      // Immediates don't contain any registers that might alias `dest`.
    936      static_assert(
    937          std::is_base_of_v<Imm32, T> || std::is_base_of_v<Imm64, T> ||
    938              std::is_base_of_v<ImmPtr, T> || std::is_base_of_v<ImmGCPtr, T> ||
    939              std::is_base_of_v<ImmWord, T>,
    940          "unhandled operand");
    941      return false;
    942    }
    943  }
    944 
    945 public:
    946  bool maybeEmitSetZeroByteRegister(Register dest) {
    947    // `setCC` only writes into the low 8-bits of the register, so it has to be
    948    // followed by `movzbl` to extend i8 to i32. This can cause a register stall
    949    // due to the partial register write performed by `setCC`. If possible zero
    950    // the output before the comparison to avoid this case.
    951    if (AllocatableGeneralRegisterSet(Registers::SingleByteRegs).has(dest)) {
    952      xorl(dest, dest);
    953      return true;
    954    }
    955    return false;
    956  }
    957 
    958  template <typename T>
    959  bool maybeEmitSetZeroByteRegister(T src, Register dest) {
    960    // Can't zero the output register if it aliases the input.
    961    if (!aliasesEmitSetRegister(src, dest)) {
    962      return maybeEmitSetZeroByteRegister(dest);
    963    }
    964    return false;
    965  }
    966 
    967  template <typename T1, typename T2>
    968  bool maybeEmitSetZeroByteRegister(T1 lhs, T2 rhs, Register dest) {
    969    // Can't zero the output register if it aliases an input.
    970    if (!aliasesEmitSetRegister(lhs, dest) &&
    971        !aliasesEmitSetRegister(rhs, dest)) {
    972      return maybeEmitSetZeroByteRegister(dest);
    973    }
    974    return false;
    975  }
    976 
    977  void emitSet(Assembler::Condition cond, Register dest, bool destIsZero,
    978               Assembler::NaNCond ifNaN = Assembler::NaN_HandledByCond) {
    979    if (AllocatableGeneralRegisterSet(Registers::SingleByteRegs).has(dest)) {
    980      // If the register we're defining is a single byte register,
    981      // take advantage of the setCC instruction
    982      setCC(cond, dest);
    983 
    984      // Extend i8 to i32 if the register wasn't previously zeroed.
    985      if (!destIsZero) {
    986        movzbl(dest, dest);
    987      }
    988 
    989      if (ifNaN != Assembler::NaN_HandledByCond) {
    990        Label noNaN;
    991        j(Assembler::NoParity, &noNaN);
    992        mov(ImmWord(ifNaN == Assembler::NaN_IsTrue), dest);
    993        bind(&noNaN);
    994      }
    995    } else {
    996      Label end;
    997      Label ifFalse;
    998 
    999      if (ifNaN == Assembler::NaN_IsFalse) {
   1000        j(Assembler::Parity, &ifFalse);
   1001      }
   1002      // Note a subtlety here: FLAGS is live at this point, and the
   1003      // mov interface doesn't guarantee to preserve FLAGS. Use
   1004      // movl instead of mov, because the movl instruction
   1005      // preserves FLAGS.
   1006      movl(Imm32(1), dest);
   1007      j(cond, &end);
   1008      if (ifNaN == Assembler::NaN_IsTrue) {
   1009        j(Assembler::Parity, &end);
   1010      }
   1011      bind(&ifFalse);
   1012      mov(ImmWord(0), dest);
   1013 
   1014      bind(&end);
   1015    }
   1016  }
   1017 
   1018  // Emit a JMP that can be toggled to a CMP. See ToggleToJmp(), ToggleToCmp().
   1019  CodeOffset toggledJump(Label* label) {
   1020    CodeOffset offset(size());
   1021    jump(label);
   1022    return offset;
   1023  }
   1024 
   1025  template <typename T>
   1026  void computeEffectiveAddress(const T& address, Register dest) {
   1027    lea(Operand(address), dest);
   1028  }
   1029 
   1030  void checkStackAlignment() {
   1031    // Exists for ARM compatibility.
   1032  }
   1033 
   1034  void abiret() { ret(); }
   1035 
   1036 protected:
   1037  bool buildOOLFakeExitFrame(void* fakeReturnAddr);
   1038 };
   1039 
   1040 }  // namespace jit
   1041 }  // namespace js
   1042 
   1043 #endif /* jit_x86_shared_MacroAssembler_x86_shared_h */