MacroAssembler-x86-shared.h (40272B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #ifndef jit_x86_shared_MacroAssembler_x86_shared_h 8 #define jit_x86_shared_MacroAssembler_x86_shared_h 9 10 #if defined(JS_CODEGEN_X86) 11 # include "jit/x86/Assembler-x86.h" 12 #elif defined(JS_CODEGEN_X64) 13 # include "jit/x64/Assembler-x64.h" 14 #endif 15 16 using js::wasm::FaultingCodeOffset; 17 18 namespace js { 19 namespace jit { 20 21 class MacroAssembler; 22 23 class MacroAssemblerX86Shared : public Assembler { 24 private: 25 // Perform a downcast. Should be removed by Bug 996602. 26 MacroAssembler& asMasm(); 27 const MacroAssembler& asMasm() const; 28 29 public: 30 #ifdef JS_CODEGEN_X64 31 using UsesItem = X86Encoding::JmpSrc; 32 #else 33 using UsesItem = CodeOffset; 34 #endif 35 36 using UsesVector = Vector<UsesItem, 0, SystemAllocPolicy>; 37 static_assert(sizeof(UsesItem) == 4); 38 39 protected: 40 template <class T> 41 struct Constant { 42 using Pod = T; 43 44 T value; 45 UsesVector uses; 46 47 explicit Constant(const T& value) : value(value) {} 48 49 // Allow move operations, but not copying. 50 Constant(Constant<T>&&) = default; 51 Constant(const Constant<T>&) = delete; 52 }; 53 54 // Containers use SystemAllocPolicy since wasm releases memory after each 55 // function is compiled, and these need to live until after all functions 56 // are compiled. 57 using Double = Constant<double>; 58 Vector<Double, 0, SystemAllocPolicy> doubles_; 59 using DoubleMap = 60 HashMap<double, size_t, DefaultHasher<double>, SystemAllocPolicy>; 61 DoubleMap doubleMap_; 62 63 using Float = Constant<float>; 64 Vector<Float, 0, SystemAllocPolicy> floats_; 65 using FloatMap = 66 HashMap<float, size_t, DefaultHasher<float>, SystemAllocPolicy>; 67 FloatMap floatMap_; 68 69 struct SimdData : public Constant<SimdConstant> { 70 explicit SimdData(SimdConstant d) : Constant<SimdConstant>(d) {} 71 SimdData(SimdData&& d) : Constant<SimdConstant>(std::move(d)) {} 72 explicit SimdData(const SimdData&) = delete; 73 SimdConstant::Type type() const { return value.type(); } 74 }; 75 76 Vector<SimdData, 0, SystemAllocPolicy> simds_; 77 using SimdMap = 78 HashMap<SimdConstant, size_t, SimdConstant, SystemAllocPolicy>; 79 SimdMap simdMap_; 80 81 template <class T, class Map> 82 T* getConstant(const typename T::Pod& value, Map& map, 83 Vector<T, 0, SystemAllocPolicy>& vec); 84 85 Float* getFloat(float f); 86 Double* getDouble(double d); 87 SimdData* getSimdData(const SimdConstant& v); 88 89 public: 90 using Assembler::call; 91 92 MacroAssemblerX86Shared() = default; 93 94 bool appendRawCode(const uint8_t* code, size_t numBytes) { 95 return masm.appendRawCode(code, numBytes); 96 } 97 98 void addToPCRel4(uint32_t offset, int32_t bias) { 99 return masm.addToPCRel4(offset, bias); 100 } 101 102 // Evaluate srcDest = minmax<isMax>{Float32,Double}(srcDest, second). 103 // Checks for NaN if canBeNaN is true. 104 void minMaxDouble(FloatRegister srcDest, FloatRegister second, bool canBeNaN, 105 bool isMax); 106 void minMaxFloat32(FloatRegister srcDest, FloatRegister second, bool canBeNaN, 107 bool isMax); 108 109 void compareDouble(DoubleCondition cond, FloatRegister lhs, 110 FloatRegister rhs) { 111 if (cond & DoubleConditionBitInvert) { 112 vucomisd(lhs, rhs); 113 } else { 114 vucomisd(rhs, lhs); 115 } 116 } 117 118 void compareFloat(DoubleCondition cond, FloatRegister lhs, 119 FloatRegister rhs) { 120 if (cond & DoubleConditionBitInvert) { 121 vucomiss(lhs, rhs); 122 } else { 123 vucomiss(rhs, lhs); 124 } 125 } 126 127 void branchNegativeZero(FloatRegister reg, Register scratch, Label* label, 128 bool maybeNonZero = true); 129 void branchNegativeZeroFloat32(FloatRegister reg, Register scratch, 130 Label* label); 131 132 void move32(Imm32 imm, Register dest) { 133 // Use the ImmWord version of mov to register, which has special 134 // optimizations. Casting to uint32_t here ensures that the value 135 // is zero-extended. 136 mov(ImmWord(uint32_t(imm.value)), dest); 137 } 138 void move32(Imm32 imm, const Operand& dest) { movl(imm, dest); } 139 void move32(Register src, Register dest) { movl(src, dest); } 140 void move32(Register src, const Operand& dest) { movl(src, dest); } 141 void test32(Register lhs, Register rhs) { testl(rhs, lhs); } 142 void test32(const Address& addr, Imm32 imm) { testl(imm, Operand(addr)); } 143 void test32(const Operand lhs, Imm32 imm) { testl(imm, lhs); } 144 void test32(Register lhs, Imm32 rhs) { testl(rhs, lhs); } 145 void cmp32(Register lhs, Imm32 rhs) { cmpl(rhs, lhs); } 146 void cmp32(Register lhs, Register rhs) { cmpl(rhs, lhs); } 147 void cmp32(const Address& lhs, Register rhs) { cmp32(Operand(lhs), rhs); } 148 void cmp32(const Address& lhs, Imm32 rhs) { cmp32(Operand(lhs), rhs); } 149 void cmp32(const Operand& lhs, Imm32 rhs) { cmpl(rhs, lhs); } 150 void cmp32(const Operand& lhs, Register rhs) { cmpl(rhs, lhs); } 151 void cmp32(Register lhs, const Operand& rhs) { cmpl(rhs, lhs); } 152 153 void cmp16(const Address& lhs, Imm32 rhs) { cmp16(Operand(lhs), rhs); } 154 void cmp16(const Operand& lhs, Imm32 rhs) { cmpw(rhs, lhs); } 155 156 void cmp8(const Address& lhs, Imm32 rhs) { cmp8(Operand(lhs), rhs); } 157 void cmp8(const Operand& lhs, Imm32 rhs) { cmpb(rhs, lhs); } 158 void cmp8(const Operand& lhs, Register rhs) { cmpb(rhs, lhs); } 159 160 void atomic_inc32(const Operand& addr) { lock_incl(addr); } 161 void atomic_dec32(const Operand& addr) { lock_decl(addr); } 162 163 void branch16(Condition cond, Register lhs, Register rhs, Label* label) { 164 cmpw(rhs, lhs); 165 j(cond, label); 166 } 167 void branchTest16(Condition cond, Register lhs, Register rhs, Label* label) { 168 testw(rhs, lhs); 169 j(cond, label); 170 } 171 172 void jump(Label* label) { jmp(label); } 173 void jump(JitCode* code) { jmp(code); } 174 void jump(TrampolinePtr code) { jmp(ImmPtr(code.value)); } 175 void jump(ImmPtr ptr) { jmp(ptr); } 176 void jump(Register reg) { jmp(Operand(reg)); } 177 void jump(const Address& addr) { jmp(Operand(addr)); } 178 179 void convertInt32ToDouble(Register src, FloatRegister dest) { 180 // vcvtsi2sd and friends write only part of their output register, which 181 // causes slowdowns on out-of-order processors. Explicitly break 182 // dependencies with vxorpd (and vxorps elsewhere), which are handled 183 // specially in modern CPUs, for this purpose. See sections 8.14, 9.8, 184 // 10.8, 12.9, 13.16, 14.14, and 15.8 of Agner's Microarchitecture 185 // document. 186 zeroDouble(dest); 187 vcvtsi2sd(src, dest, dest); 188 } 189 void convertInt32ToDouble(const Address& src, FloatRegister dest) { 190 convertInt32ToDouble(Operand(src), dest); 191 } 192 void convertInt32ToDouble(const BaseIndex& src, FloatRegister dest) { 193 convertInt32ToDouble(Operand(src), dest); 194 } 195 void convertInt32ToDouble(const Operand& src, FloatRegister dest) { 196 // Clear the output register first to break dependencies; see above; 197 zeroDouble(dest); 198 vcvtsi2sd(Operand(src), dest, dest); 199 } 200 void convertInt32ToFloat32(Register src, FloatRegister dest) { 201 // Clear the output register first to break dependencies; see above; 202 zeroFloat32(dest); 203 vcvtsi2ss(src, dest, dest); 204 } 205 void convertInt32ToFloat32(const Address& src, FloatRegister dest) { 206 convertInt32ToFloat32(Operand(src), dest); 207 } 208 void convertInt32ToFloat32(const Operand& src, FloatRegister dest) { 209 // Clear the output register first to break dependencies; see above; 210 zeroFloat32(dest); 211 vcvtsi2ss(src, dest, dest); 212 } 213 Condition testDoubleTruthy(bool truthy, FloatRegister reg) { 214 ScratchDoubleScope scratch(asMasm()); 215 zeroDouble(scratch); 216 vucomisd(reg, scratch); 217 return truthy ? NonZero : Zero; 218 } 219 220 // Class which ensures that registers used in byte ops are compatible with 221 // such instructions, even if the original register passed in wasn't. This 222 // only applies to x86, as on x64 all registers are valid single byte regs. 223 // This doesn't lead to great code but helps to simplify code generation. 224 // 225 // Note that this can currently only be used in cases where the register is 226 // read from by the guarded instruction, not written to. 227 class AutoEnsureByteRegister { 228 MacroAssemblerX86Shared* masm; 229 Register original_; 230 Register substitute_; 231 232 public: 233 template <typename T> 234 AutoEnsureByteRegister(MacroAssemblerX86Shared* masm, T address, 235 Register reg) 236 : masm(masm), original_(reg) { 237 AllocatableGeneralRegisterSet singleByteRegs(Registers::SingleByteRegs); 238 if (singleByteRegs.has(reg)) { 239 substitute_ = reg; 240 } else { 241 MOZ_ASSERT(address.base != StackPointer); 242 do { 243 substitute_ = singleByteRegs.takeAny(); 244 } while (Operand(address).containsReg(substitute_)); 245 246 masm->push(substitute_); 247 masm->mov(reg, substitute_); 248 } 249 } 250 251 ~AutoEnsureByteRegister() { 252 if (original_ != substitute_) { 253 masm->pop(substitute_); 254 } 255 } 256 257 Register reg() { return substitute_; } 258 }; 259 260 void load8ZeroExtend(const Operand& src, Register dest) { movzbl(src, dest); } 261 FaultingCodeOffset load8ZeroExtend(const Address& src, Register dest) { 262 FaultingCodeOffset fco = FaultingCodeOffset(currentOffset()); 263 movzbl(Operand(src), dest); 264 return fco; 265 } 266 FaultingCodeOffset load8ZeroExtend(const BaseIndex& src, Register dest) { 267 FaultingCodeOffset fco = FaultingCodeOffset(currentOffset()); 268 movzbl(Operand(src), dest); 269 return fco; 270 } 271 void load8SignExtend(const Operand& src, Register dest) { movsbl(src, dest); } 272 FaultingCodeOffset load8SignExtend(const Address& src, Register dest) { 273 FaultingCodeOffset fco = FaultingCodeOffset(currentOffset()); 274 movsbl(Operand(src), dest); 275 return fco; 276 } 277 FaultingCodeOffset load8SignExtend(const BaseIndex& src, Register dest) { 278 FaultingCodeOffset fco = FaultingCodeOffset(currentOffset()); 279 movsbl(Operand(src), dest); 280 return fco; 281 } 282 template <typename T> 283 void store8(Imm32 src, const T& dest) { 284 movb(src, Operand(dest)); 285 } 286 template <typename T> 287 FaultingCodeOffset store8(Register src, const T& dest) { 288 AutoEnsureByteRegister ensure(this, dest, src); 289 // We must read the current offset only after AutoEnsureByteRegister's 290 // constructor has done its thing, since it may insert instructions, and 291 // we want to get the offset for the `movb` itself. 292 FaultingCodeOffset fco = FaultingCodeOffset(currentOffset()); 293 movb(ensure.reg(), Operand(dest)); 294 return fco; 295 } 296 void load16ZeroExtend(const Operand& src, Register dest) { 297 movzwl(src, dest); 298 } 299 FaultingCodeOffset load16ZeroExtend(const Address& src, Register dest) { 300 FaultingCodeOffset fco = FaultingCodeOffset(currentOffset()); 301 movzwl(Operand(src), dest); 302 return fco; 303 } 304 FaultingCodeOffset load16ZeroExtend(const BaseIndex& src, Register dest) { 305 FaultingCodeOffset fco = FaultingCodeOffset(currentOffset()); 306 movzwl(Operand(src), dest); 307 return fco; 308 } 309 template <typename S> 310 void load16UnalignedZeroExtend(const S& src, Register dest) { 311 load16ZeroExtend(src, dest); 312 } 313 template <typename S, typename T> 314 FaultingCodeOffset store16(const S& src, const T& dest) { 315 FaultingCodeOffset fco = FaultingCodeOffset(currentOffset()); 316 movw(src, Operand(dest)); 317 return fco; 318 } 319 template <typename S, typename T> 320 void store16Unaligned(const S& src, const T& dest) { 321 store16(src, dest); 322 } 323 void load16SignExtend(const Operand& src, Register dest) { 324 movswl(src, dest); 325 } 326 FaultingCodeOffset load16SignExtend(const Address& src, Register dest) { 327 FaultingCodeOffset fco = FaultingCodeOffset(currentOffset()); 328 movswl(Operand(src), dest); 329 return fco; 330 } 331 FaultingCodeOffset load16SignExtend(const BaseIndex& src, Register dest) { 332 FaultingCodeOffset fco = FaultingCodeOffset(currentOffset()); 333 movswl(Operand(src), dest); 334 return fco; 335 } 336 template <typename S> 337 void load16UnalignedSignExtend(const S& src, Register dest) { 338 load16SignExtend(src, dest); 339 } 340 FaultingCodeOffset load32(const Address& address, Register dest) { 341 FaultingCodeOffset fco = FaultingCodeOffset(currentOffset()); 342 movl(Operand(address), dest); 343 return fco; 344 } 345 FaultingCodeOffset load32(const BaseIndex& src, Register dest) { 346 FaultingCodeOffset fco = FaultingCodeOffset(currentOffset()); 347 movl(Operand(src), dest); 348 return fco; 349 } 350 void load32(const Operand& src, Register dest) { movl(src, dest); } 351 template <typename S> 352 void load32Unaligned(const S& src, Register dest) { 353 load32(src, dest); 354 } 355 template <typename S, typename T> 356 FaultingCodeOffset store32(const S& src, const T& dest) { 357 FaultingCodeOffset fco = FaultingCodeOffset(currentOffset()); 358 movl(src, Operand(dest)); 359 return fco; 360 } 361 template <typename S, typename T> 362 void store32Unaligned(const S& src, const T& dest) { 363 store32(src, dest); 364 } 365 FaultingCodeOffset loadDouble(const Address& src, FloatRegister dest) { 366 FaultingCodeOffset fco = FaultingCodeOffset(currentOffset()); 367 vmovsd(src, dest); 368 return fco; 369 } 370 FaultingCodeOffset loadDouble(const BaseIndex& src, FloatRegister dest) { 371 FaultingCodeOffset fco = FaultingCodeOffset(currentOffset()); 372 vmovsd(src, dest); 373 return fco; 374 } 375 void loadDouble(const Operand& src, FloatRegister dest) { 376 switch (src.kind()) { 377 case Operand::MEM_REG_DISP: 378 loadDouble(src.toAddress(), dest); 379 break; 380 case Operand::MEM_SCALE: 381 loadDouble(src.toBaseIndex(), dest); 382 break; 383 default: 384 MOZ_CRASH("unexpected operand kind"); 385 } 386 } 387 void moveDouble(FloatRegister src, FloatRegister dest) { 388 // Use vmovapd instead of vmovsd to avoid dependencies. 389 vmovapd(src, dest); 390 } 391 void zeroDouble(FloatRegister reg) { vxorpd(reg, reg, reg); } 392 void zeroFloat32(FloatRegister reg) { vxorps(reg, reg, reg); } 393 void convertFloat32ToDouble(FloatRegister src, FloatRegister dest) { 394 vcvtss2sd(src, dest, dest); 395 } 396 void convertDoubleToFloat32(FloatRegister src, FloatRegister dest) { 397 vcvtsd2ss(src, dest, dest); 398 } 399 400 void convertDoubleToFloat16(FloatRegister src, FloatRegister dest) { 401 MOZ_CRASH("Not supported for this target"); 402 } 403 void convertFloat16ToDouble(FloatRegister src, FloatRegister dest) { 404 convertFloat16ToFloat32(src, dest); 405 convertFloat32ToDouble(dest, dest); 406 } 407 void convertFloat32ToFloat16(FloatRegister src, FloatRegister dest) { 408 vcvtps2ph(src, dest); 409 } 410 void convertFloat16ToFloat32(FloatRegister src, FloatRegister dest) { 411 // Zero extend word to quadword. This ensures all high words in the result 412 // are zeroed after vcvtph2ps. 413 vpmovzxwq(Operand(src), dest); 414 415 // Convert Float16 to Float32. 416 vcvtph2ps(dest, dest); 417 } 418 void convertInt32ToFloat16(Register src, FloatRegister dest) { 419 // Clear the output register first to break dependencies; see above; 420 // 421 // This also ensures all high words in the result are zeroed. 422 zeroFloat32(dest); 423 424 // Convert Int32 to Float32. 425 vcvtsi2ss(src, dest, dest); 426 427 // Convert Float32 to Float16. 428 vcvtps2ph(dest, dest); 429 } 430 431 void loadInt32x4(const Address& addr, FloatRegister dest) { 432 vmovdqa(Operand(addr), dest); 433 } 434 void loadFloat32x4(const Address& addr, FloatRegister dest) { 435 vmovaps(Operand(addr), dest); 436 } 437 void storeInt32x4(FloatRegister src, const Address& addr) { 438 vmovdqa(src, Operand(addr)); 439 } 440 void storeFloat32x4(FloatRegister src, const Address& addr) { 441 vmovaps(src, Operand(addr)); 442 } 443 444 void convertFloat32x4ToInt32x4(FloatRegister src, FloatRegister dest) { 445 // Note that if the conversion failed (because the converted 446 // result is larger than the maximum signed int32, or less than the 447 // least signed int32, or NaN), this will return the undefined integer 448 // value (0x8000000). 449 vcvttps2dq(src, dest); 450 } 451 void convertInt32x4ToFloat32x4(FloatRegister src, FloatRegister dest) { 452 vcvtdq2ps(src, dest); 453 } 454 455 void binarySimd128(const SimdConstant& rhs, FloatRegister lhsDest, 456 void (MacroAssembler::*regOp)(const Operand&, 457 FloatRegister, 458 FloatRegister), 459 void (MacroAssembler::*constOp)(const SimdConstant&, 460 FloatRegister)); 461 void binarySimd128( 462 FloatRegister lhs, const SimdConstant& rhs, FloatRegister dest, 463 void (MacroAssembler::*regOp)(const Operand&, FloatRegister, 464 FloatRegister), 465 void (MacroAssembler::*constOp)(const SimdConstant&, FloatRegister, 466 FloatRegister)); 467 void binarySimd128(const SimdConstant& rhs, FloatRegister lhsDest, 468 void (MacroAssembler::*regOp)(const Operand&, 469 FloatRegister), 470 void (MacroAssembler::*constOp)(const SimdConstant&, 471 FloatRegister)); 472 473 // SIMD methods, defined in MacroAssembler-x86-shared-SIMD.cpp. 474 475 void unsignedConvertInt32x4ToFloat32x4(FloatRegister src, FloatRegister dest); 476 void unsignedConvertInt32x4ToFloat64x2(FloatRegister src, FloatRegister dest); 477 void bitwiseTestSimd128(const SimdConstant& rhs, FloatRegister lhs); 478 479 void truncSatFloat32x4ToInt32x4(FloatRegister src, FloatRegister dest); 480 void unsignedTruncSatFloat32x4ToInt32x4(FloatRegister src, FloatRegister temp, 481 FloatRegister dest); 482 void unsignedTruncFloat32x4ToInt32x4Relaxed(FloatRegister src, 483 FloatRegister dest); 484 void truncSatFloat64x2ToInt32x4(FloatRegister src, FloatRegister temp, 485 FloatRegister dest); 486 void unsignedTruncSatFloat64x2ToInt32x4(FloatRegister src, FloatRegister temp, 487 FloatRegister dest); 488 void unsignedTruncFloat64x2ToInt32x4Relaxed(FloatRegister src, 489 FloatRegister dest); 490 491 void splatX16(Register input, FloatRegister output); 492 void splatX8(Register input, FloatRegister output); 493 void splatX4(Register input, FloatRegister output); 494 void splatX4(FloatRegister input, FloatRegister output); 495 void splatX2(FloatRegister input, FloatRegister output); 496 497 void extractLaneInt32x4(FloatRegister input, Register output, unsigned lane); 498 void extractLaneFloat32x4(FloatRegister input, FloatRegister output, 499 unsigned lane); 500 void extractLaneFloat64x2(FloatRegister input, FloatRegister output, 501 unsigned lane); 502 void extractLaneInt16x8(FloatRegister input, Register output, unsigned lane, 503 SimdSign sign); 504 void extractLaneInt8x16(FloatRegister input, Register output, unsigned lane, 505 SimdSign sign); 506 507 void replaceLaneFloat32x4(unsigned lane, FloatRegister lhs, FloatRegister rhs, 508 FloatRegister dest); 509 void replaceLaneFloat64x2(unsigned lane, FloatRegister lhs, FloatRegister rhs, 510 FloatRegister dest); 511 512 void shuffleInt8x16(FloatRegister lhs, FloatRegister rhs, 513 FloatRegister output, const uint8_t lanes[16]); 514 void blendInt8x16(FloatRegister lhs, FloatRegister rhs, FloatRegister output, 515 FloatRegister temp, const uint8_t lanes[16]); 516 void blendInt16x8(FloatRegister lhs, FloatRegister rhs, FloatRegister output, 517 const uint16_t lanes[8]); 518 void laneSelectSimd128(FloatRegister mask, FloatRegister lhs, 519 FloatRegister rhs, FloatRegister output); 520 521 void compareInt8x16(FloatRegister lhs, Operand rhs, Assembler::Condition cond, 522 FloatRegister output); 523 void compareInt8x16(Assembler::Condition cond, FloatRegister lhs, 524 const SimdConstant& rhs, FloatRegister dest); 525 void compareInt16x8(FloatRegister lhs, Operand rhs, Assembler::Condition cond, 526 FloatRegister output); 527 void compareInt16x8(Assembler::Condition cond, FloatRegister lhs, 528 const SimdConstant& rhs, FloatRegister dest); 529 void compareInt32x4(FloatRegister lhs, Operand rhs, Assembler::Condition cond, 530 FloatRegister output); 531 void compareInt32x4(Assembler::Condition cond, FloatRegister lhs, 532 const SimdConstant& rhs, FloatRegister dest); 533 void compareForEqualityInt64x2(FloatRegister lhs, Operand rhs, 534 Assembler::Condition cond, 535 FloatRegister output); 536 void compareForOrderingInt64x2(FloatRegister lhs, Operand rhs, 537 Assembler::Condition cond, FloatRegister temp1, 538 FloatRegister temp2, FloatRegister output); 539 void compareForOrderingInt64x2AVX(FloatRegister lhs, FloatRegister rhs, 540 Assembler::Condition cond, 541 FloatRegister output); 542 void compareFloat32x4(FloatRegister lhs, Operand rhs, 543 Assembler::Condition cond, FloatRegister output); 544 void compareFloat32x4(Assembler::Condition cond, FloatRegister lhs, 545 const SimdConstant& rhs, FloatRegister dest); 546 void compareFloat64x2(FloatRegister lhs, Operand rhs, 547 Assembler::Condition cond, FloatRegister output); 548 void compareFloat64x2(Assembler::Condition cond, FloatRegister lhs, 549 const SimdConstant& rhs, FloatRegister dest); 550 551 void minMaxFloat32x4(bool isMin, FloatRegister lhs, Operand rhs, 552 FloatRegister temp1, FloatRegister temp2, 553 FloatRegister output); 554 void minMaxFloat32x4AVX(bool isMin, FloatRegister lhs, FloatRegister rhs, 555 FloatRegister temp1, FloatRegister temp2, 556 FloatRegister output); 557 void minMaxFloat64x2(bool isMin, FloatRegister lhs, Operand rhs, 558 FloatRegister temp1, FloatRegister temp2, 559 FloatRegister output); 560 void minMaxFloat64x2AVX(bool isMin, FloatRegister lhs, FloatRegister rhs, 561 FloatRegister temp1, FloatRegister temp2, 562 FloatRegister output); 563 void minFloat32x4(FloatRegister lhs, FloatRegister rhs, FloatRegister temp1, 564 FloatRegister temp2, FloatRegister output); 565 void maxFloat32x4(FloatRegister lhs, FloatRegister rhs, FloatRegister temp1, 566 FloatRegister temp2, FloatRegister output); 567 568 void minFloat64x2(FloatRegister lhs, FloatRegister rhs, FloatRegister temp1, 569 FloatRegister temp2, FloatRegister output); 570 void maxFloat64x2(FloatRegister lhs, FloatRegister rhs, FloatRegister temp1, 571 FloatRegister temp2, FloatRegister output); 572 573 void packedShiftByScalarInt8x16( 574 FloatRegister in, Register count, FloatRegister xtmp, FloatRegister dest, 575 void (MacroAssemblerX86Shared::*shift)(FloatRegister, FloatRegister, 576 FloatRegister), 577 void (MacroAssemblerX86Shared::*extend)(const Operand&, FloatRegister)); 578 579 void packedLeftShiftByScalarInt8x16(FloatRegister in, Register count, 580 FloatRegister xtmp, FloatRegister dest); 581 void packedLeftShiftByScalarInt8x16(Imm32 count, FloatRegister src, 582 FloatRegister dest); 583 void packedRightShiftByScalarInt8x16(FloatRegister in, Register count, 584 FloatRegister xtmp, FloatRegister dest); 585 void packedRightShiftByScalarInt8x16(Imm32 count, FloatRegister src, 586 FloatRegister dest); 587 void packedUnsignedRightShiftByScalarInt8x16(FloatRegister in, Register count, 588 FloatRegister xtmp, 589 FloatRegister dest); 590 void packedUnsignedRightShiftByScalarInt8x16(Imm32 count, FloatRegister src, 591 FloatRegister dest); 592 593 void packedLeftShiftByScalarInt16x8(FloatRegister in, Register count, 594 FloatRegister dest); 595 void packedRightShiftByScalarInt16x8(FloatRegister in, Register count, 596 FloatRegister dest); 597 void packedUnsignedRightShiftByScalarInt16x8(FloatRegister in, Register count, 598 FloatRegister dest); 599 600 void packedLeftShiftByScalarInt32x4(FloatRegister in, Register count, 601 FloatRegister dest); 602 void packedRightShiftByScalarInt32x4(FloatRegister in, Register count, 603 FloatRegister dest); 604 void packedUnsignedRightShiftByScalarInt32x4(FloatRegister in, Register count, 605 FloatRegister dest); 606 void packedLeftShiftByScalarInt64x2(FloatRegister in, Register count, 607 FloatRegister dest); 608 void packedRightShiftByScalarInt64x2(FloatRegister in, Register count, 609 FloatRegister temp, FloatRegister dest); 610 void packedRightShiftByScalarInt64x2(Imm32 count, FloatRegister src, 611 FloatRegister dest); 612 void packedUnsignedRightShiftByScalarInt64x2(FloatRegister in, Register count, 613 FloatRegister dest); 614 void selectSimd128(FloatRegister mask, FloatRegister onTrue, 615 FloatRegister onFalse, FloatRegister temp, 616 FloatRegister output); 617 void popcntInt8x16(FloatRegister src, FloatRegister temp, 618 FloatRegister output); 619 620 // SIMD inline methods private to the implementation, that appear to be used. 621 622 void loadAlignedSimd128Int(const Operand& src, FloatRegister dest) { 623 vmovdqa(src, dest); 624 } 625 void moveSimd128Int(FloatRegister src, FloatRegister dest) { 626 if (src != dest) { 627 vmovdqa(src, dest); 628 } 629 } 630 FloatRegister moveSimd128IntIfNotAVX(FloatRegister src, FloatRegister dest) { 631 MOZ_ASSERT(src.isSimd128() && dest.isSimd128()); 632 if (HasAVX()) { 633 return src; 634 } 635 moveSimd128Int(src, dest); 636 return dest; 637 } 638 FloatRegister selectDestIfAVX(FloatRegister src, FloatRegister dest) { 639 MOZ_ASSERT(src.isSimd128() && dest.isSimd128()); 640 return HasAVX() ? dest : src; 641 } 642 FaultingCodeOffset loadUnalignedSimd128Int(const Address& src, 643 FloatRegister dest) { 644 FaultingCodeOffset fco = FaultingCodeOffset(currentOffset()); 645 vmovdqu(Operand(src), dest); 646 return fco; 647 } 648 FaultingCodeOffset loadUnalignedSimd128Int(const BaseIndex& src, 649 FloatRegister dest) { 650 FaultingCodeOffset fco = FaultingCodeOffset(currentOffset()); 651 vmovdqu(Operand(src), dest); 652 return fco; 653 } 654 void loadUnalignedSimd128Int(const Operand& src, FloatRegister dest) { 655 vmovdqu(src, dest); 656 } 657 FaultingCodeOffset storeUnalignedSimd128Int(FloatRegister src, 658 const Address& dest) { 659 FaultingCodeOffset fco = FaultingCodeOffset(currentOffset()); 660 vmovdqu(src, Operand(dest)); 661 return fco; 662 } 663 FaultingCodeOffset storeUnalignedSimd128Int(FloatRegister src, 664 const BaseIndex& dest) { 665 FaultingCodeOffset fco = FaultingCodeOffset(currentOffset()); 666 vmovdqu(src, Operand(dest)); 667 return fco; 668 } 669 void storeUnalignedSimd128Int(FloatRegister src, const Operand& dest) { 670 vmovdqu(src, dest); 671 } 672 void packedLeftShiftByScalarInt16x8(Imm32 count, FloatRegister dest) { 673 count.value &= 15; 674 vpsllw(count, dest, dest); 675 } 676 void packedRightShiftByScalarInt16x8(Imm32 count, FloatRegister dest) { 677 count.value &= 15; 678 vpsraw(count, dest, dest); 679 } 680 void packedUnsignedRightShiftByScalarInt16x8(Imm32 count, 681 FloatRegister dest) { 682 count.value &= 15; 683 vpsrlw(count, dest, dest); 684 } 685 void packedLeftShiftByScalarInt32x4(Imm32 count, FloatRegister dest) { 686 count.value &= 31; 687 vpslld(count, dest, dest); 688 } 689 void packedRightShiftByScalarInt32x4(Imm32 count, FloatRegister dest) { 690 count.value &= 31; 691 vpsrad(count, dest, dest); 692 } 693 void packedUnsignedRightShiftByScalarInt32x4(Imm32 count, 694 FloatRegister dest) { 695 count.value &= 31; 696 vpsrld(count, dest, dest); 697 } 698 void loadAlignedSimd128Float(const Address& src, FloatRegister dest) { 699 vmovaps(Operand(src), dest); 700 } 701 void loadAlignedSimd128Float(const Operand& src, FloatRegister dest) { 702 vmovaps(src, dest); 703 } 704 void storeAlignedSimd128Float(FloatRegister src, const Address& dest) { 705 vmovaps(src, Operand(dest)); 706 } 707 void moveSimd128Float(FloatRegister src, FloatRegister dest) { 708 if (src != dest) { 709 vmovaps(src, dest); 710 } 711 } 712 FloatRegister moveSimd128FloatIfNotAVX(FloatRegister src, 713 FloatRegister dest) { 714 MOZ_ASSERT(src.isSimd128() && dest.isSimd128()); 715 if (HasAVX()) { 716 return src; 717 } 718 moveSimd128Float(src, dest); 719 return dest; 720 } 721 FloatRegister moveSimd128FloatIfEqual(FloatRegister src, FloatRegister dest, 722 FloatRegister other) { 723 MOZ_ASSERT(src.isSimd128() && dest.isSimd128()); 724 if (src != other) { 725 return src; 726 } 727 moveSimd128Float(src, dest); 728 return dest; 729 } 730 FloatRegister moveSimd128FloatIfNotAVXOrOther(FloatRegister src, 731 FloatRegister dest, 732 FloatRegister other) { 733 MOZ_ASSERT(src.isSimd128() && dest.isSimd128()); 734 if (HasAVX() && src != other) { 735 return src; 736 } 737 moveSimd128Float(src, dest); 738 return dest; 739 } 740 741 FaultingCodeOffset loadUnalignedSimd128(const Operand& src, 742 FloatRegister dest) { 743 FaultingCodeOffset fco = FaultingCodeOffset(currentOffset()); 744 vmovups(src, dest); 745 return fco; 746 } 747 FaultingCodeOffset storeUnalignedSimd128(FloatRegister src, 748 const Operand& dest) { 749 FaultingCodeOffset fco = FaultingCodeOffset(currentOffset()); 750 vmovups(src, dest); 751 return fco; 752 } 753 754 static uint32_t ComputeShuffleMask(uint32_t x = 0, uint32_t y = 1, 755 uint32_t z = 2, uint32_t w = 3) { 756 MOZ_ASSERT(x < 4 && y < 4 && z < 4 && w < 4); 757 uint32_t r = (w << 6) | (z << 4) | (y << 2) | (x << 0); 758 MOZ_ASSERT(r < 256); 759 return r; 760 } 761 762 void shuffleInt32(uint32_t mask, FloatRegister src, FloatRegister dest) { 763 vpshufd(mask, src, dest); 764 } 765 void moveLowInt32(FloatRegister src, Register dest) { vmovd(src, dest); } 766 767 void moveHighPairToLowPairFloat32(FloatRegister src, FloatRegister dest) { 768 vmovhlps(src, dest, dest); 769 } 770 FaultingCodeOffset loadFloat32(const Address& src, FloatRegister dest) { 771 FaultingCodeOffset fco = FaultingCodeOffset(currentOffset()); 772 vmovss(src, dest); 773 return fco; 774 } 775 FaultingCodeOffset loadFloat32(const BaseIndex& src, FloatRegister dest) { 776 FaultingCodeOffset fco = FaultingCodeOffset(currentOffset()); 777 vmovss(src, dest); 778 return fco; 779 } 780 void loadFloat32(const Operand& src, FloatRegister dest) { 781 switch (src.kind()) { 782 case Operand::MEM_REG_DISP: 783 loadFloat32(src.toAddress(), dest); 784 break; 785 case Operand::MEM_SCALE: 786 loadFloat32(src.toBaseIndex(), dest); 787 break; 788 default: 789 MOZ_CRASH("unexpected operand kind"); 790 } 791 } 792 void moveFloat32(FloatRegister src, FloatRegister dest) { 793 // Use vmovaps instead of vmovss to avoid dependencies. 794 vmovaps(src, dest); 795 } 796 797 FaultingCodeOffset loadFloat16(const Address& addr, FloatRegister dest, 798 Register scratch) { 799 auto fco = load16ZeroExtend(addr, scratch); 800 801 // Move from GPR to FloatRegister. 802 vmovd(scratch, dest); 803 804 return fco; 805 } 806 FaultingCodeOffset loadFloat16(const BaseIndex& src, FloatRegister dest, 807 Register scratch) { 808 auto fco = load16ZeroExtend(src, scratch); 809 810 // Move from GPR to FloatRegister. 811 vmovd(scratch, dest); 812 813 return fco; 814 } 815 816 // Checks whether a double is representable as a 32-bit integer. If so, the 817 // integer is written to the output register. Otherwise, a bailout is taken to 818 // the given snapshot. This function overwrites the scratch float register. 819 void convertDoubleToInt32(FloatRegister src, Register dest, Label* fail, 820 bool negativeZeroCheck = true) { 821 // Check for -0.0 822 if (negativeZeroCheck) { 823 branchNegativeZero(src, dest, fail); 824 } 825 826 ScratchDoubleScope scratch(asMasm()); 827 vcvttsd2si(src, dest); 828 convertInt32ToDouble(dest, scratch); 829 vucomisd(scratch, src); 830 j(Assembler::Parity, fail); 831 j(Assembler::NotEqual, fail); 832 } 833 834 // Checks whether a float32 is representable as a 32-bit integer. If so, the 835 // integer is written to the output register. Otherwise, a bailout is taken to 836 // the given snapshot. This function overwrites the scratch float register. 837 void convertFloat32ToInt32(FloatRegister src, Register dest, Label* fail, 838 bool negativeZeroCheck = true) { 839 // Check for -0.0 840 if (negativeZeroCheck) { 841 branchNegativeZeroFloat32(src, dest, fail); 842 } 843 844 ScratchFloat32Scope scratch(asMasm()); 845 vcvttss2si(src, dest); 846 convertInt32ToFloat32(dest, scratch); 847 vucomiss(scratch, src); 848 j(Assembler::Parity, fail); 849 j(Assembler::NotEqual, fail); 850 } 851 852 void truncateDoubleToInt32(FloatRegister src, Register dest, Label* fail) { 853 // vcvttsd2si returns 0x80000000 on failure. Test for it by 854 // subtracting 1 and testing overflow. The other possibility is to test 855 // equality for INT_MIN after a comparison, but 1 costs fewer bytes to 856 // materialize. 857 vcvttsd2si(src, dest); 858 cmp32(dest, Imm32(1)); 859 j(Assembler::Overflow, fail); 860 } 861 void truncateFloat32ToInt32(FloatRegister src, Register dest, Label* fail) { 862 // Same trick as explained in the above comment. 863 vcvttss2si(src, dest); 864 cmp32(dest, Imm32(1)); 865 j(Assembler::Overflow, fail); 866 } 867 868 inline void clampIntToUint8(Register reg); 869 870 bool maybeInlineDouble(double d, FloatRegister dest) { 871 // Loading zero with xor is specially optimized in hardware. 872 if (mozilla::IsPositiveZero(d)) { 873 zeroDouble(dest); 874 return true; 875 } 876 877 // It is also possible to load several common constants using vpcmpeqw 878 // to get all ones and then vpsllq and vpsrlq to get zeros at the ends, 879 // as described in "13.4 Generating constants" of 880 // "2. Optimizing subroutines in assembly language" by Agner Fog, and as 881 // previously implemented here. However, with x86 and x64 both using 882 // constant pool loads for double constants, this is probably only 883 // worthwhile in cases where a load is likely to be delayed. 884 885 return false; 886 } 887 888 bool maybeInlineFloat(float f, FloatRegister dest) { 889 // See comment above 890 if (mozilla::IsPositiveZero(f)) { 891 zeroFloat32(dest); 892 return true; 893 } 894 return false; 895 } 896 897 bool maybeInlineSimd128Int(const SimdConstant& v, const FloatRegister& dest) { 898 if (v.isZeroBits()) { 899 vpxor(dest, dest, dest); 900 return true; 901 } 902 if (v.isOneBits()) { 903 vpcmpeqw(Operand(dest), dest, dest); 904 return true; 905 } 906 return false; 907 } 908 bool maybeInlineSimd128Float(const SimdConstant& v, 909 const FloatRegister& dest) { 910 if (v.isZeroBits()) { 911 vxorps(dest, dest, dest); 912 return true; 913 } 914 return false; 915 } 916 917 void convertBoolToInt32(Register source, Register dest) { 918 // Note that C++ bool is only 1 byte, so zero extend it to clear the 919 // higher-order bits. 920 movzbl(source, dest); 921 } 922 923 private: 924 template <typename T> 925 static bool aliasesEmitSetRegister(T src, Register dest) { 926 if constexpr (std::is_base_of_v<Register, T>) { 927 return src == dest; 928 } else if constexpr (std::is_base_of_v<Address, T>) { 929 return src.base == dest; 930 } else if constexpr (std::is_base_of_v<BaseIndex, T>) { 931 return src.base == dest || src.index == dest; 932 } else if constexpr (std::is_base_of_v<ValueOperand, T>) { 933 return src.aliases(dest); 934 } else { 935 // Immediates don't contain any registers that might alias `dest`. 936 static_assert( 937 std::is_base_of_v<Imm32, T> || std::is_base_of_v<Imm64, T> || 938 std::is_base_of_v<ImmPtr, T> || std::is_base_of_v<ImmGCPtr, T> || 939 std::is_base_of_v<ImmWord, T>, 940 "unhandled operand"); 941 return false; 942 } 943 } 944 945 public: 946 bool maybeEmitSetZeroByteRegister(Register dest) { 947 // `setCC` only writes into the low 8-bits of the register, so it has to be 948 // followed by `movzbl` to extend i8 to i32. This can cause a register stall 949 // due to the partial register write performed by `setCC`. If possible zero 950 // the output before the comparison to avoid this case. 951 if (AllocatableGeneralRegisterSet(Registers::SingleByteRegs).has(dest)) { 952 xorl(dest, dest); 953 return true; 954 } 955 return false; 956 } 957 958 template <typename T> 959 bool maybeEmitSetZeroByteRegister(T src, Register dest) { 960 // Can't zero the output register if it aliases the input. 961 if (!aliasesEmitSetRegister(src, dest)) { 962 return maybeEmitSetZeroByteRegister(dest); 963 } 964 return false; 965 } 966 967 template <typename T1, typename T2> 968 bool maybeEmitSetZeroByteRegister(T1 lhs, T2 rhs, Register dest) { 969 // Can't zero the output register if it aliases an input. 970 if (!aliasesEmitSetRegister(lhs, dest) && 971 !aliasesEmitSetRegister(rhs, dest)) { 972 return maybeEmitSetZeroByteRegister(dest); 973 } 974 return false; 975 } 976 977 void emitSet(Assembler::Condition cond, Register dest, bool destIsZero, 978 Assembler::NaNCond ifNaN = Assembler::NaN_HandledByCond) { 979 if (AllocatableGeneralRegisterSet(Registers::SingleByteRegs).has(dest)) { 980 // If the register we're defining is a single byte register, 981 // take advantage of the setCC instruction 982 setCC(cond, dest); 983 984 // Extend i8 to i32 if the register wasn't previously zeroed. 985 if (!destIsZero) { 986 movzbl(dest, dest); 987 } 988 989 if (ifNaN != Assembler::NaN_HandledByCond) { 990 Label noNaN; 991 j(Assembler::NoParity, &noNaN); 992 mov(ImmWord(ifNaN == Assembler::NaN_IsTrue), dest); 993 bind(&noNaN); 994 } 995 } else { 996 Label end; 997 Label ifFalse; 998 999 if (ifNaN == Assembler::NaN_IsFalse) { 1000 j(Assembler::Parity, &ifFalse); 1001 } 1002 // Note a subtlety here: FLAGS is live at this point, and the 1003 // mov interface doesn't guarantee to preserve FLAGS. Use 1004 // movl instead of mov, because the movl instruction 1005 // preserves FLAGS. 1006 movl(Imm32(1), dest); 1007 j(cond, &end); 1008 if (ifNaN == Assembler::NaN_IsTrue) { 1009 j(Assembler::Parity, &end); 1010 } 1011 bind(&ifFalse); 1012 mov(ImmWord(0), dest); 1013 1014 bind(&end); 1015 } 1016 } 1017 1018 // Emit a JMP that can be toggled to a CMP. See ToggleToJmp(), ToggleToCmp(). 1019 CodeOffset toggledJump(Label* label) { 1020 CodeOffset offset(size()); 1021 jump(label); 1022 return offset; 1023 } 1024 1025 template <typename T> 1026 void computeEffectiveAddress(const T& address, Register dest) { 1027 lea(Operand(address), dest); 1028 } 1029 1030 void checkStackAlignment() { 1031 // Exists for ARM compatibility. 1032 } 1033 1034 void abiret() { ret(); } 1035 1036 protected: 1037 bool buildOOLFakeExitFrame(void* fakeReturnAddr); 1038 }; 1039 1040 } // namespace jit 1041 } // namespace js 1042 1043 #endif /* jit_x86_shared_MacroAssembler_x86_shared_h */