MacroAssembler-x86-shared.cpp (76480B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "jit/x86-shared/MacroAssembler-x86-shared.h" 8 9 #include "mozilla/Casting.h" 10 11 #include "jsmath.h" 12 13 #include "jit/JitFrames.h" 14 #include "jit/MacroAssembler.h" 15 #include "js/ScalarType.h" // js::Scalar::Type 16 17 #include "jit/MacroAssembler-inl.h" 18 19 using namespace js; 20 using namespace js::jit; 21 22 // Note: this function clobbers the input register. 23 void MacroAssembler::clampDoubleToUint8(FloatRegister input, Register output) { 24 ScratchDoubleScope scratch(*this); 25 MOZ_ASSERT(input != scratch); 26 Label positive, done; 27 28 // <= 0 or NaN --> 0 29 zeroDouble(scratch); 30 branchDouble(DoubleGreaterThan, input, scratch, &positive); 31 { 32 move32(Imm32(0), output); 33 jump(&done); 34 } 35 36 bind(&positive); 37 38 if (HasRoundInstruction(RoundingMode::NearestTiesToEven)) { 39 // Round input to nearest integer. 40 nearbyIntDouble(RoundingMode::NearestTiesToEven, input, input); 41 42 // Truncate to int32 and ensure the result <= 255. This relies on the 43 // processor setting output to a value > 255 for doubles outside the int32 44 // range (for instance 0x80000000). 45 vcvttsd2si(input, output); 46 branch32(Assembler::BelowOrEqual, output, Imm32(255), &done); 47 move32(Imm32(255), output); 48 } else { 49 Label outOfRange; 50 51 // Truncate to int32 and ensure the result <= 255. This relies on the 52 // processor setting output to a value > 255 for doubles outside the int32 53 // range (for instance 0x80000000). 54 vcvttsd2si(input, output); 55 branch32(Assembler::AboveOrEqual, output, Imm32(255), &outOfRange); 56 { 57 // Check if we had a tie. 58 convertInt32ToDouble(output, scratch); 59 subDouble(scratch, input); 60 61 loadConstantDouble(0.5, scratch); 62 63 Label roundUp; 64 vucomisd(scratch, input); 65 j(Above, &roundUp); 66 j(NotEqual, &done); 67 68 // It was a tie. Round up if the output is odd. 69 branchTest32(Zero, output, Imm32(1), &done); 70 71 bind(&roundUp); 72 add32(Imm32(1), output); 73 jump(&done); 74 } 75 76 // > 255 --> 255 77 bind(&outOfRange); 78 move32(Imm32(255), output); 79 } 80 81 bind(&done); 82 } 83 84 bool MacroAssemblerX86Shared::buildOOLFakeExitFrame(void* fakeReturnAddr) { 85 asMasm().Push(FrameDescriptor(FrameType::IonJS)); 86 asMasm().Push(ImmPtr(fakeReturnAddr)); 87 asMasm().Push(FramePointer); 88 return true; 89 } 90 91 void MacroAssemblerX86Shared::branchNegativeZero(FloatRegister reg, 92 Register scratch, Label* label, 93 bool maybeNonZero) { 94 // Determines whether the low double contained in the XMM register reg 95 // is equal to -0.0. 96 97 #if defined(JS_CODEGEN_X86) 98 Label nonZero; 99 100 // if not already compared to zero 101 if (maybeNonZero) { 102 ScratchDoubleScope scratchDouble(asMasm()); 103 104 // Compare to zero. Lets through {0, -0}. 105 zeroDouble(scratchDouble); 106 107 // If reg is non-zero, jump to nonZero. 108 asMasm().branchDouble(DoubleNotEqual, reg, scratchDouble, &nonZero); 109 } 110 // Input register is either zero or negative zero. Retrieve sign of input. 111 vmovmskpd(reg, scratch); 112 113 // If reg is 1 or 3, input is negative zero. 114 // If reg is 0 or 2, input is a normal zero. 115 asMasm().branchTest32(NonZero, scratch, Imm32(1), label); 116 117 bind(&nonZero); 118 #elif defined(JS_CODEGEN_X64) 119 vmovq(reg, scratch); 120 cmpq(Imm32(1), scratch); 121 j(Overflow, label); 122 #endif 123 } 124 125 void MacroAssemblerX86Shared::branchNegativeZeroFloat32(FloatRegister reg, 126 Register scratch, 127 Label* label) { 128 vmovd(reg, scratch); 129 cmp32(scratch, Imm32(1)); 130 j(Overflow, label); 131 } 132 133 MacroAssembler& MacroAssemblerX86Shared::asMasm() { 134 return *static_cast<MacroAssembler*>(this); 135 } 136 137 const MacroAssembler& MacroAssemblerX86Shared::asMasm() const { 138 return *static_cast<const MacroAssembler*>(this); 139 } 140 141 template <class T, class Map> 142 T* MacroAssemblerX86Shared::getConstant(const typename T::Pod& value, Map& map, 143 Vector<T, 0, SystemAllocPolicy>& vec) { 144 using AddPtr = typename Map::AddPtr; 145 size_t index; 146 if (AddPtr p = map.lookupForAdd(value)) { 147 index = p->value(); 148 } else { 149 index = vec.length(); 150 enoughMemory_ &= vec.append(T(value)); 151 if (!enoughMemory_) { 152 return nullptr; 153 } 154 enoughMemory_ &= map.add(p, value, index); 155 if (!enoughMemory_) { 156 return nullptr; 157 } 158 } 159 return &vec[index]; 160 } 161 162 MacroAssemblerX86Shared::Float* MacroAssemblerX86Shared::getFloat(float f) { 163 return getConstant<Float, FloatMap>(f, floatMap_, floats_); 164 } 165 166 MacroAssemblerX86Shared::Double* MacroAssemblerX86Shared::getDouble(double d) { 167 return getConstant<Double, DoubleMap>(d, doubleMap_, doubles_); 168 } 169 170 MacroAssemblerX86Shared::SimdData* MacroAssemblerX86Shared::getSimdData( 171 const SimdConstant& v) { 172 return getConstant<SimdData, SimdMap>(v, simdMap_, simds_); 173 } 174 175 void MacroAssemblerX86Shared::binarySimd128( 176 const SimdConstant& rhs, FloatRegister lhsDest, 177 void (MacroAssembler::*regOp)(const Operand&, FloatRegister, FloatRegister), 178 void (MacroAssembler::*constOp)(const SimdConstant&, FloatRegister)) { 179 ScratchSimd128Scope scratch(asMasm()); 180 if (maybeInlineSimd128Int(rhs, scratch)) { 181 (asMasm().*regOp)(Operand(scratch), lhsDest, lhsDest); 182 } else { 183 (asMasm().*constOp)(rhs, lhsDest); 184 } 185 } 186 187 void MacroAssemblerX86Shared::binarySimd128( 188 FloatRegister lhs, const SimdConstant& rhs, FloatRegister dest, 189 void (MacroAssembler::*regOp)(const Operand&, FloatRegister, FloatRegister), 190 void (MacroAssembler::*constOp)(const SimdConstant&, FloatRegister, 191 FloatRegister)) { 192 ScratchSimd128Scope scratch(asMasm()); 193 if (maybeInlineSimd128Int(rhs, scratch)) { 194 (asMasm().*regOp)(Operand(scratch), lhs, dest); 195 } else { 196 (asMasm().*constOp)(rhs, lhs, dest); 197 } 198 } 199 200 void MacroAssemblerX86Shared::binarySimd128( 201 const SimdConstant& rhs, FloatRegister lhs, 202 void (MacroAssembler::*regOp)(const Operand&, FloatRegister), 203 void (MacroAssembler::*constOp)(const SimdConstant&, FloatRegister)) { 204 ScratchSimd128Scope scratch(asMasm()); 205 if (maybeInlineSimd128Int(rhs, scratch)) { 206 (asMasm().*regOp)(Operand(scratch), lhs); 207 } else { 208 (asMasm().*constOp)(rhs, lhs); 209 } 210 } 211 212 void MacroAssemblerX86Shared::bitwiseTestSimd128(const SimdConstant& rhs, 213 FloatRegister lhs) { 214 ScratchSimd128Scope scratch(asMasm()); 215 if (maybeInlineSimd128Int(rhs, scratch)) { 216 vptest(scratch, lhs); 217 } else { 218 asMasm().vptestSimd128(rhs, lhs); 219 } 220 } 221 222 void MacroAssemblerX86Shared::minMaxDouble(FloatRegister first, 223 FloatRegister second, bool canBeNaN, 224 bool isMax) { 225 Label done, nan, minMaxInst; 226 227 // Do a vucomisd to catch equality and NaNs, which both require special 228 // handling. If the operands are ordered and inequal, we branch straight to 229 // the min/max instruction. If we wanted, we could also branch for less-than 230 // or greater-than here instead of using min/max, however these conditions 231 // will sometimes be hard on the branch predictor. 232 vucomisd(second, first); 233 j(Assembler::NotEqual, &minMaxInst); 234 if (canBeNaN) { 235 j(Assembler::Parity, &nan); 236 } 237 238 // Ordered and equal. The operands are bit-identical unless they are zero 239 // and negative zero. These instructions merge the sign bits in that 240 // case, and are no-ops otherwise. 241 if (isMax) { 242 vandpd(second, first, first); 243 } else { 244 vorpd(second, first, first); 245 } 246 jump(&done); 247 248 // x86's min/max are not symmetric; if either operand is a NaN, they return 249 // the read-only operand. We need to return a NaN if either operand is a 250 // NaN, so we explicitly check for a NaN in the read-write operand. 251 if (canBeNaN) { 252 bind(&nan); 253 vucomisd(first, first); 254 j(Assembler::Parity, &done); 255 } 256 257 // When the values are inequal, or second is NaN, x86's min and max will 258 // return the value we need. 259 bind(&minMaxInst); 260 if (isMax) { 261 vmaxsd(second, first, first); 262 } else { 263 vminsd(second, first, first); 264 } 265 266 bind(&done); 267 } 268 269 void MacroAssemblerX86Shared::minMaxFloat32(FloatRegister first, 270 FloatRegister second, bool canBeNaN, 271 bool isMax) { 272 Label done, nan, minMaxInst; 273 274 // Do a vucomiss to catch equality and NaNs, which both require special 275 // handling. If the operands are ordered and inequal, we branch straight to 276 // the min/max instruction. If we wanted, we could also branch for less-than 277 // or greater-than here instead of using min/max, however these conditions 278 // will sometimes be hard on the branch predictor. 279 vucomiss(second, first); 280 j(Assembler::NotEqual, &minMaxInst); 281 if (canBeNaN) { 282 j(Assembler::Parity, &nan); 283 } 284 285 // Ordered and equal. The operands are bit-identical unless they are zero 286 // and negative zero. These instructions merge the sign bits in that 287 // case, and are no-ops otherwise. 288 if (isMax) { 289 vandps(second, first, first); 290 } else { 291 vorps(second, first, first); 292 } 293 jump(&done); 294 295 // x86's min/max are not symmetric; if either operand is a NaN, they return 296 // the read-only operand. We need to return a NaN if either operand is a 297 // NaN, so we explicitly check for a NaN in the read-write operand. 298 if (canBeNaN) { 299 bind(&nan); 300 vucomiss(first, first); 301 j(Assembler::Parity, &done); 302 } 303 304 // When the values are inequal, or second is NaN, x86's min and max will 305 // return the value we need. 306 bind(&minMaxInst); 307 if (isMax) { 308 vmaxss(second, first, first); 309 } else { 310 vminss(second, first, first); 311 } 312 313 bind(&done); 314 } 315 316 #ifdef ENABLE_WASM_SIMD 317 bool MacroAssembler::MustMaskShiftCountSimd128(wasm::SimdOp op, int32_t* mask) { 318 switch (op) { 319 case wasm::SimdOp::I8x16Shl: 320 case wasm::SimdOp::I8x16ShrU: 321 case wasm::SimdOp::I8x16ShrS: 322 *mask = 7; 323 break; 324 case wasm::SimdOp::I16x8Shl: 325 case wasm::SimdOp::I16x8ShrU: 326 case wasm::SimdOp::I16x8ShrS: 327 *mask = 15; 328 break; 329 case wasm::SimdOp::I32x4Shl: 330 case wasm::SimdOp::I32x4ShrU: 331 case wasm::SimdOp::I32x4ShrS: 332 *mask = 31; 333 break; 334 case wasm::SimdOp::I64x2Shl: 335 case wasm::SimdOp::I64x2ShrU: 336 case wasm::SimdOp::I64x2ShrS: 337 *mask = 63; 338 break; 339 default: 340 MOZ_CRASH("Unexpected shift operation"); 341 } 342 return true; 343 } 344 #endif 345 346 //{{{ check_macroassembler_style 347 // =============================================================== 348 // MacroAssembler high-level usage. 349 350 void MacroAssembler::flush() {} 351 352 void MacroAssembler::comment(const char* msg) { masm.comment(msg); } 353 354 // This operation really consists of five phases, in order to enforce the 355 // restriction that on x86_shared, the dividend must be eax and both eax and edx 356 // will be clobbered. 357 // 358 // Input: { lhs, rhs } 359 // 360 // [PUSH] Preserve registers 361 // [MOVE] Generate moves to specific registers 362 // 363 // [DIV] Input: { regForRhs, EAX } 364 // [DIV] extend EAX into EDX 365 // [DIV] x86 Division operator 366 // [DIV] Output: { EAX, EDX } 367 // 368 // [MOVE] Move specific registers to outputs 369 // [POP] Restore registers 370 // 371 // Output: { quotientOutput, remainderOutput } 372 static void EmitDivMod32(MacroAssembler& masm, Register lhs, Register rhs, 373 Register divOutput, Register remOutput, 374 bool isUnsigned) { 375 if (lhs == rhs) { 376 if (divOutput != Register::Invalid()) { 377 masm.movl(Imm32(1), divOutput); 378 } 379 if (remOutput != Register::Invalid()) { 380 masm.movl(Imm32(0), remOutput); 381 } 382 return; 383 } 384 385 // Choose a register that is not edx or eax to hold the rhs; 386 // ebx is chosen arbitrarily, and will be preserved if necessary. 387 Register regForRhs = (rhs == eax || rhs == edx) ? ebx : rhs; 388 389 // Add registers we will be clobbering as live, but 390 // also remove the set we do not restore. 391 LiveRegisterSet preserve; 392 preserve.add(edx); 393 preserve.add(eax); 394 if (rhs != regForRhs) { 395 preserve.add(regForRhs); 396 } 397 398 if (divOutput != Register::Invalid()) { 399 preserve.takeUnchecked(divOutput); 400 } 401 if (remOutput != Register::Invalid()) { 402 preserve.takeUnchecked(remOutput); 403 } 404 405 masm.PushRegsInMask(preserve); 406 407 // Shuffle input into place. 408 masm.moveRegPair(lhs, rhs, eax, regForRhs); 409 410 // Sign extend eax into edx to make (edx:eax): idiv/udiv are 64-bit. 411 if (isUnsigned) { 412 masm.mov(ImmWord(0), edx); 413 masm.udiv(regForRhs); 414 } else { 415 masm.cdq(); 416 masm.idiv(regForRhs); 417 } 418 419 if (divOutput != Register::Invalid() && remOutput != Register::Invalid()) { 420 masm.moveRegPair(eax, edx, divOutput, remOutput); 421 } else { 422 if (divOutput != Register::Invalid() && divOutput != eax) { 423 masm.mov(eax, divOutput); 424 } 425 if (remOutput != Register::Invalid() && remOutput != edx) { 426 masm.mov(edx, remOutput); 427 } 428 } 429 430 masm.PopRegsInMask(preserve); 431 } 432 433 void MacroAssembler::flexibleDivMod32(Register lhs, Register rhs, 434 Register divOutput, Register remOutput, 435 bool isUnsigned, const LiveRegisterSet&) { 436 MOZ_ASSERT(lhs != divOutput && lhs != remOutput, "lhs is preserved"); 437 MOZ_ASSERT(rhs != divOutput && rhs != remOutput, "rhs is preserved"); 438 439 EmitDivMod32(*this, lhs, rhs, divOutput, remOutput, isUnsigned); 440 } 441 442 void MacroAssembler::flexibleQuotient32( 443 Register lhs, Register rhs, Register dest, bool isUnsigned, 444 const LiveRegisterSet& volatileLiveRegs) { 445 EmitDivMod32(*this, lhs, rhs, dest, Register::Invalid(), isUnsigned); 446 } 447 448 void MacroAssembler::flexibleRemainder32( 449 Register lhs, Register rhs, Register dest, bool isUnsigned, 450 const LiveRegisterSet& volatileLiveRegs) { 451 EmitDivMod32(*this, lhs, rhs, Register::Invalid(), dest, isUnsigned); 452 } 453 454 // =============================================================== 455 // Stack manipulation functions. 456 457 size_t MacroAssembler::PushRegsInMaskSizeInBytes(LiveRegisterSet set) { 458 FloatRegisterSet fpuSet(set.fpus().reduceSetForPush()); 459 return set.gprs().size() * sizeof(intptr_t) + fpuSet.getPushSizeInBytes(); 460 } 461 462 void MacroAssembler::PushRegsInMask(LiveRegisterSet set) { 463 mozilla::DebugOnly<size_t> framePushedInitial = framePushed(); 464 465 FloatRegisterSet fpuSet(set.fpus().reduceSetForPush()); 466 unsigned numFpu = fpuSet.size(); 467 int32_t diffF = fpuSet.getPushSizeInBytes(); 468 int32_t diffG = set.gprs().size() * sizeof(intptr_t); 469 470 // On x86, always use push to push the integer registers, as it's fast 471 // on modern hardware and it's a small instruction. 472 for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more(); ++iter) { 473 diffG -= sizeof(intptr_t); 474 Push(*iter); 475 } 476 MOZ_ASSERT(diffG == 0); 477 (void)diffG; 478 479 reserveStack(diffF); 480 for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) { 481 FloatRegister reg = *iter; 482 diffF -= reg.size(); 483 numFpu -= 1; 484 Address spillAddress(StackPointer, diffF); 485 if (reg.isDouble()) { 486 storeDouble(reg, spillAddress); 487 } else if (reg.isSingle()) { 488 storeFloat32(reg, spillAddress); 489 } else if (reg.isSimd128()) { 490 storeUnalignedSimd128(reg, spillAddress); 491 } else { 492 MOZ_CRASH("Unknown register type."); 493 } 494 } 495 MOZ_ASSERT(numFpu == 0); 496 (void)numFpu; 497 498 // x64 padding to keep the stack aligned on uintptr_t. Keep in sync with 499 // GetPushSizeInBytes. 500 size_t alignExtra = ((size_t)diffF) % sizeof(uintptr_t); 501 MOZ_ASSERT_IF(sizeof(uintptr_t) == 8, alignExtra == 0 || alignExtra == 4); 502 MOZ_ASSERT_IF(sizeof(uintptr_t) == 4, alignExtra == 0); 503 diffF -= alignExtra; 504 MOZ_ASSERT(diffF == 0); 505 506 // The macroassembler will keep the stack sizeof(uintptr_t)-aligned, so 507 // we don't need to take into account `alignExtra` here. 508 MOZ_ASSERT(framePushed() - framePushedInitial == 509 PushRegsInMaskSizeInBytes(set)); 510 } 511 512 void MacroAssembler::storeRegsInMask(LiveRegisterSet set, Address dest, 513 Register) { 514 mozilla::DebugOnly<size_t> offsetInitial = dest.offset; 515 516 FloatRegisterSet fpuSet(set.fpus().reduceSetForPush()); 517 unsigned numFpu = fpuSet.size(); 518 int32_t diffF = fpuSet.getPushSizeInBytes(); 519 int32_t diffG = set.gprs().size() * sizeof(intptr_t); 520 521 MOZ_ASSERT(dest.offset >= diffG + diffF); 522 523 for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more(); ++iter) { 524 diffG -= sizeof(intptr_t); 525 dest.offset -= sizeof(intptr_t); 526 storePtr(*iter, dest); 527 } 528 MOZ_ASSERT(diffG == 0); 529 (void)diffG; 530 531 for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) { 532 FloatRegister reg = *iter; 533 diffF -= reg.size(); 534 numFpu -= 1; 535 dest.offset -= reg.size(); 536 if (reg.isDouble()) { 537 storeDouble(reg, dest); 538 } else if (reg.isSingle()) { 539 storeFloat32(reg, dest); 540 } else if (reg.isSimd128()) { 541 storeUnalignedSimd128(reg, dest); 542 } else { 543 MOZ_CRASH("Unknown register type."); 544 } 545 } 546 MOZ_ASSERT(numFpu == 0); 547 (void)numFpu; 548 549 // x64 padding to keep the stack aligned on uintptr_t. Keep in sync with 550 // GetPushSizeInBytes. 551 size_t alignExtra = ((size_t)diffF) % sizeof(uintptr_t); 552 MOZ_ASSERT_IF(sizeof(uintptr_t) == 8, alignExtra == 0 || alignExtra == 4); 553 MOZ_ASSERT_IF(sizeof(uintptr_t) == 4, alignExtra == 0); 554 diffF -= alignExtra; 555 MOZ_ASSERT(diffF == 0); 556 557 // What this means is: if `alignExtra` is nonzero, then the save area size 558 // actually used is `alignExtra` bytes smaller than what 559 // PushRegsInMaskSizeInBytes claims. Hence we need to compensate for that. 560 MOZ_ASSERT(alignExtra + offsetInitial - dest.offset == 561 PushRegsInMaskSizeInBytes(set)); 562 } 563 564 void MacroAssembler::PopRegsInMaskIgnore(LiveRegisterSet set, 565 LiveRegisterSet ignore) { 566 mozilla::DebugOnly<size_t> framePushedInitial = framePushed(); 567 568 FloatRegisterSet fpuSet(set.fpus().reduceSetForPush()); 569 unsigned numFpu = fpuSet.size(); 570 int32_t diffG = set.gprs().size() * sizeof(intptr_t); 571 int32_t diffF = fpuSet.getPushSizeInBytes(); 572 const int32_t reservedG = diffG; 573 const int32_t reservedF = diffF; 574 575 for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) { 576 FloatRegister reg = *iter; 577 diffF -= reg.size(); 578 numFpu -= 1; 579 if (ignore.has(reg)) { 580 continue; 581 } 582 583 Address spillAddress(StackPointer, diffF); 584 if (reg.isDouble()) { 585 loadDouble(spillAddress, reg); 586 } else if (reg.isSingle()) { 587 loadFloat32(spillAddress, reg); 588 } else if (reg.isSimd128()) { 589 loadUnalignedSimd128(spillAddress, reg); 590 } else { 591 MOZ_CRASH("Unknown register type."); 592 } 593 } 594 freeStack(reservedF); 595 MOZ_ASSERT(numFpu == 0); 596 (void)numFpu; 597 // x64 padding to keep the stack aligned on uintptr_t. Keep in sync with 598 // GetPushBytesInSize. 599 diffF -= diffF % sizeof(uintptr_t); 600 MOZ_ASSERT(diffF == 0); 601 602 // On x86, use pop to pop the integer registers, if we're not going to 603 // ignore any slots, as it's fast on modern hardware and it's a small 604 // instruction. 605 if (ignore.emptyGeneral()) { 606 for (GeneralRegisterForwardIterator iter(set.gprs()); iter.more(); ++iter) { 607 diffG -= sizeof(intptr_t); 608 Pop(*iter); 609 } 610 } else { 611 for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more(); 612 ++iter) { 613 diffG -= sizeof(intptr_t); 614 if (!ignore.has(*iter)) { 615 loadPtr(Address(StackPointer, diffG), *iter); 616 } 617 } 618 freeStack(reservedG); 619 } 620 MOZ_ASSERT(diffG == 0); 621 622 MOZ_ASSERT(framePushedInitial - framePushed() == 623 PushRegsInMaskSizeInBytes(set)); 624 } 625 626 void MacroAssembler::Push(const Operand op) { 627 push(op); 628 adjustFrame(sizeof(intptr_t)); 629 } 630 631 void MacroAssembler::Push(Register reg) { 632 push(reg); 633 adjustFrame(sizeof(intptr_t)); 634 } 635 636 void MacroAssembler::Push(const Imm32 imm) { 637 push(imm); 638 adjustFrame(sizeof(intptr_t)); 639 } 640 641 void MacroAssembler::Push(const ImmWord imm) { 642 push(imm); 643 adjustFrame(sizeof(intptr_t)); 644 } 645 646 void MacroAssembler::Push(const ImmPtr imm) { 647 Push(ImmWord(uintptr_t(imm.value))); 648 } 649 650 void MacroAssembler::Push(const ImmGCPtr ptr) { 651 push(ptr); 652 adjustFrame(sizeof(intptr_t)); 653 } 654 655 void MacroAssembler::Push(FloatRegister t) { 656 push(t); 657 // See Assembler::push(FloatRegister) for why we use sizeof(double). 658 adjustFrame(sizeof(double)); 659 } 660 661 void MacroAssembler::PushFlags() { 662 pushFlags(); 663 adjustFrame(sizeof(intptr_t)); 664 } 665 666 void MacroAssembler::Pop(const Operand op) { 667 pop(op); 668 implicitPop(sizeof(intptr_t)); 669 } 670 671 void MacroAssembler::Pop(Register reg) { 672 pop(reg); 673 implicitPop(sizeof(intptr_t)); 674 } 675 676 void MacroAssembler::Pop(FloatRegister reg) { 677 pop(reg); 678 // See Assembler::pop(FloatRegister) for why we use sizeof(double). 679 implicitPop(sizeof(double)); 680 } 681 682 void MacroAssembler::Pop(const ValueOperand& val) { 683 popValue(val); 684 implicitPop(sizeof(Value)); 685 } 686 687 void MacroAssembler::PopFlags() { 688 popFlags(); 689 implicitPop(sizeof(intptr_t)); 690 } 691 692 void MacroAssembler::PopStackPtr() { Pop(StackPointer); } 693 694 void MacroAssembler::freeStackTo(uint32_t framePushed) { 695 MOZ_ASSERT(framePushed <= framePushed_); 696 lea(Operand(FramePointer, -int32_t(framePushed)), StackPointer); 697 framePushed_ = framePushed; 698 } 699 700 // =============================================================== 701 // Simple call functions. 702 703 CodeOffset MacroAssembler::call(Register reg) { return Assembler::call(reg); } 704 705 CodeOffset MacroAssembler::call(Label* label) { return Assembler::call(label); } 706 707 CodeOffset MacroAssembler::call(const Address& addr) { 708 Assembler::call(Operand(addr.base, addr.offset)); 709 return CodeOffset(currentOffset()); 710 } 711 712 CodeOffset MacroAssembler::call(wasm::SymbolicAddress target) { 713 mov(target, eax); 714 return Assembler::call(eax); 715 } 716 717 void MacroAssembler::call(ImmWord target) { Assembler::call(target); } 718 719 void MacroAssembler::call(ImmPtr target) { Assembler::call(target); } 720 721 void MacroAssembler::call(JitCode* target) { Assembler::call(target); } 722 723 CodeOffset MacroAssembler::callWithPatch() { 724 return Assembler::callWithPatch(); 725 } 726 void MacroAssembler::patchCall(uint32_t callerOffset, uint32_t calleeOffset) { 727 Assembler::patchCall(callerOffset, calleeOffset); 728 } 729 730 void MacroAssembler::callAndPushReturnAddress(Register reg) { call(reg); } 731 732 void MacroAssembler::callAndPushReturnAddress(Label* label) { call(label); } 733 734 // =============================================================== 735 // Patchable near/far jumps. 736 737 CodeOffset MacroAssembler::farJumpWithPatch() { 738 return Assembler::farJumpWithPatch(); 739 } 740 741 void MacroAssembler::patchFarJump(CodeOffset farJump, uint32_t targetOffset) { 742 Assembler::patchFarJump(farJump, targetOffset); 743 } 744 745 void MacroAssembler::patchFarJump(uint8_t* farJump, uint8_t* target) { 746 Assembler::patchFarJump(farJump, target); 747 } 748 749 CodeOffset MacroAssembler::nopPatchableToCall() { 750 masm.nop_five(); 751 return CodeOffset(currentOffset()); 752 } 753 754 void MacroAssembler::patchNopToCall(uint8_t* callsite, uint8_t* target) { 755 Assembler::patchFiveByteNopToCall(callsite, target); 756 } 757 758 void MacroAssembler::patchCallToNop(uint8_t* callsite) { 759 Assembler::patchCallToFiveByteNop(callsite); 760 } 761 762 CodeOffset MacroAssembler::move32WithPatch(Register dest) { 763 movl(Imm32(-1), dest); 764 return CodeOffset(currentOffset()); 765 } 766 767 void MacroAssembler::patchMove32(CodeOffset offset, Imm32 n) { 768 X86Encoding::SetInt32(masm.data() + offset.offset(), n.value); 769 } 770 771 // =============================================================== 772 // Jit Frames. 773 774 uint32_t MacroAssembler::pushFakeReturnAddress(Register scratch) { 775 CodeLabel cl; 776 777 mov(&cl, scratch); 778 Push(scratch); 779 bind(&cl); 780 uint32_t retAddr = currentOffset(); 781 782 addCodeLabel(cl); 783 return retAddr; 784 } 785 786 // =============================================================== 787 // WebAssembly 788 789 FaultingCodeOffset MacroAssembler::wasmTrapInstruction() { 790 return FaultingCodeOffset(ud2().offset()); 791 } 792 793 void MacroAssembler::wasmBoundsCheck32(Condition cond, Register index, 794 Register boundsCheckLimit, 795 Label* label) { 796 cmp32(index, boundsCheckLimit); 797 j(cond, label); 798 if (JitOptions.spectreIndexMasking) { 799 cmovCCl(cond, Operand(boundsCheckLimit), index); 800 } 801 } 802 803 void MacroAssembler::wasmBoundsCheck32(Condition cond, Register index, 804 Address boundsCheckLimit, Label* label) { 805 cmp32(index, Operand(boundsCheckLimit)); 806 j(cond, label); 807 if (JitOptions.spectreIndexMasking) { 808 cmovCCl(cond, Operand(boundsCheckLimit), index); 809 } 810 } 811 812 // RAII class that generates the jumps to traps when it's destructed, to 813 // prevent some code duplication in the outOfLineWasmTruncateXtoY methods. 814 struct MOZ_RAII AutoHandleWasmTruncateToIntErrors { 815 MacroAssembler& masm; 816 Label inputIsNaN; 817 Label intOverflow; 818 const wasm::TrapSiteDesc& trapSiteDesc; 819 820 explicit AutoHandleWasmTruncateToIntErrors( 821 MacroAssembler& masm, const wasm::TrapSiteDesc& trapSiteDesc) 822 : masm(masm), trapSiteDesc(trapSiteDesc) {} 823 824 ~AutoHandleWasmTruncateToIntErrors() { 825 // Handle errors. These cases are not in arbitrary order: code will 826 // fall through to intOverflow. 827 masm.bind(&intOverflow); 828 masm.wasmTrap(wasm::Trap::IntegerOverflow, trapSiteDesc); 829 830 masm.bind(&inputIsNaN); 831 masm.wasmTrap(wasm::Trap::InvalidConversionToInteger, trapSiteDesc); 832 } 833 }; 834 835 void MacroAssembler::wasmTruncateDoubleToInt32(FloatRegister input, 836 Register output, 837 bool isSaturating, 838 Label* oolEntry) { 839 vcvttsd2si(input, output); 840 cmp32(output, Imm32(1)); 841 j(Assembler::Overflow, oolEntry); 842 } 843 844 void MacroAssembler::wasmTruncateFloat32ToInt32(FloatRegister input, 845 Register output, 846 bool isSaturating, 847 Label* oolEntry) { 848 vcvttss2si(input, output); 849 cmp32(output, Imm32(1)); 850 j(Assembler::Overflow, oolEntry); 851 } 852 853 void MacroAssembler::oolWasmTruncateCheckF64ToI32( 854 FloatRegister input, Register output, TruncFlags flags, 855 const wasm::TrapSiteDesc& trapSiteDesc, Label* rejoin) { 856 bool isUnsigned = flags & TRUNC_UNSIGNED; 857 bool isSaturating = flags & TRUNC_SATURATING; 858 859 if (isSaturating) { 860 if (isUnsigned) { 861 // Negative overflow and NaN both are converted to 0, and the only 862 // other case is positive overflow which is converted to 863 // UINT32_MAX. 864 Label nonNegative; 865 ScratchDoubleScope fpscratch(*this); 866 loadConstantDouble(0.0, fpscratch); 867 branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, 868 &nonNegative); 869 move32(Imm32(0), output); 870 jump(rejoin); 871 872 bind(&nonNegative); 873 move32(Imm32(UINT32_MAX), output); 874 } else { 875 // Negative overflow is already saturated to INT32_MIN, so we only 876 // have to handle NaN and positive overflow here. 877 Label notNaN; 878 branchDouble(Assembler::DoubleOrdered, input, input, ¬NaN); 879 move32(Imm32(0), output); 880 jump(rejoin); 881 882 bind(¬NaN); 883 ScratchDoubleScope fpscratch(*this); 884 loadConstantDouble(0.0, fpscratch); 885 branchDouble(Assembler::DoubleLessThan, input, fpscratch, rejoin); 886 sub32(Imm32(1), output); 887 } 888 jump(rejoin); 889 return; 890 } 891 892 AutoHandleWasmTruncateToIntErrors traps(*this, trapSiteDesc); 893 894 // Eagerly take care of NaNs. 895 branchDouble(Assembler::DoubleUnordered, input, input, &traps.inputIsNaN); 896 897 // For unsigned, fall through to intOverflow failure case. 898 if (isUnsigned) { 899 return; 900 } 901 902 // Handle special values. 903 904 // We've used vcvttsd2si. The only valid double values that can 905 // truncate to INT32_MIN are in ]INT32_MIN - 1; INT32_MIN]. 906 ScratchDoubleScope fpscratch(*this); 907 loadConstantDouble(double(INT32_MIN) - 1.0, fpscratch); 908 branchDouble(Assembler::DoubleLessThanOrEqual, input, fpscratch, 909 &traps.intOverflow); 910 911 loadConstantDouble(0.0, fpscratch); 912 branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, 913 &traps.intOverflow); 914 jump(rejoin); 915 } 916 917 void MacroAssembler::oolWasmTruncateCheckF32ToI32( 918 FloatRegister input, Register output, TruncFlags flags, 919 const wasm::TrapSiteDesc& trapSiteDesc, Label* rejoin) { 920 bool isUnsigned = flags & TRUNC_UNSIGNED; 921 bool isSaturating = flags & TRUNC_SATURATING; 922 923 if (isSaturating) { 924 if (isUnsigned) { 925 // Negative overflow and NaN both are converted to 0, and the only 926 // other case is positive overflow which is converted to 927 // UINT32_MAX. 928 Label nonNegative; 929 ScratchFloat32Scope fpscratch(*this); 930 loadConstantFloat32(0.0f, fpscratch); 931 branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, 932 &nonNegative); 933 move32(Imm32(0), output); 934 jump(rejoin); 935 936 bind(&nonNegative); 937 move32(Imm32(UINT32_MAX), output); 938 } else { 939 // Negative overflow is already saturated to INT32_MIN, so we only 940 // have to handle NaN and positive overflow here. 941 Label notNaN; 942 branchFloat(Assembler::DoubleOrdered, input, input, ¬NaN); 943 move32(Imm32(0), output); 944 jump(rejoin); 945 946 bind(¬NaN); 947 ScratchFloat32Scope fpscratch(*this); 948 loadConstantFloat32(0.0f, fpscratch); 949 branchFloat(Assembler::DoubleLessThan, input, fpscratch, rejoin); 950 sub32(Imm32(1), output); 951 } 952 jump(rejoin); 953 return; 954 } 955 956 AutoHandleWasmTruncateToIntErrors traps(*this, trapSiteDesc); 957 958 // Eagerly take care of NaNs. 959 branchFloat(Assembler::DoubleUnordered, input, input, &traps.inputIsNaN); 960 961 // For unsigned, fall through to intOverflow failure case. 962 if (isUnsigned) { 963 return; 964 } 965 966 // Handle special values. 967 968 // We've used vcvttss2si. Check that the input wasn't 969 // float(INT32_MIN), which is the only legimitate input that 970 // would truncate to INT32_MIN. 971 ScratchFloat32Scope fpscratch(*this); 972 loadConstantFloat32(float(INT32_MIN), fpscratch); 973 branchFloat(Assembler::DoubleNotEqual, input, fpscratch, &traps.intOverflow); 974 jump(rejoin); 975 } 976 977 void MacroAssembler::oolWasmTruncateCheckF64ToI64( 978 FloatRegister input, Register64 output, TruncFlags flags, 979 const wasm::TrapSiteDesc& trapSiteDesc, Label* rejoin) { 980 bool isUnsigned = flags & TRUNC_UNSIGNED; 981 bool isSaturating = flags & TRUNC_SATURATING; 982 983 if (isSaturating) { 984 if (isUnsigned) { 985 // Negative overflow and NaN both are converted to 0, and the only 986 // other case is positive overflow which is converted to 987 // UINT64_MAX. 988 Label positive; 989 ScratchDoubleScope fpscratch(*this); 990 loadConstantDouble(0.0, fpscratch); 991 branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, &positive); 992 move64(Imm64(0), output); 993 jump(rejoin); 994 995 bind(&positive); 996 move64(Imm64(UINT64_MAX), output); 997 } else { 998 // Negative overflow is already saturated to INT64_MIN, so we only 999 // have to handle NaN and positive overflow here. 1000 Label notNaN; 1001 branchDouble(Assembler::DoubleOrdered, input, input, ¬NaN); 1002 move64(Imm64(0), output); 1003 jump(rejoin); 1004 1005 bind(¬NaN); 1006 ScratchDoubleScope fpscratch(*this); 1007 loadConstantDouble(0.0, fpscratch); 1008 branchDouble(Assembler::DoubleLessThan, input, fpscratch, rejoin); 1009 sub64(Imm64(1), output); 1010 } 1011 jump(rejoin); 1012 return; 1013 } 1014 1015 AutoHandleWasmTruncateToIntErrors traps(*this, trapSiteDesc); 1016 1017 // Eagerly take care of NaNs. 1018 branchDouble(Assembler::DoubleUnordered, input, input, &traps.inputIsNaN); 1019 1020 // Handle special values. 1021 if (isUnsigned) { 1022 ScratchDoubleScope fpscratch(*this); 1023 loadConstantDouble(0.0, fpscratch); 1024 branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, 1025 &traps.intOverflow); 1026 loadConstantDouble(-1.0, fpscratch); 1027 branchDouble(Assembler::DoubleLessThanOrEqual, input, fpscratch, 1028 &traps.intOverflow); 1029 jump(rejoin); 1030 return; 1031 } 1032 1033 // We've used vcvtsd2sq. The only legit value whose i64 1034 // truncation is INT64_MIN is double(INT64_MIN): exponent is so 1035 // high that the highest resolution around is much more than 1. 1036 ScratchDoubleScope fpscratch(*this); 1037 loadConstantDouble(double(int64_t(INT64_MIN)), fpscratch); 1038 branchDouble(Assembler::DoubleNotEqual, input, fpscratch, &traps.intOverflow); 1039 jump(rejoin); 1040 } 1041 1042 void MacroAssembler::oolWasmTruncateCheckF32ToI64( 1043 FloatRegister input, Register64 output, TruncFlags flags, 1044 const wasm::TrapSiteDesc& trapSiteDesc, Label* rejoin) { 1045 bool isUnsigned = flags & TRUNC_UNSIGNED; 1046 bool isSaturating = flags & TRUNC_SATURATING; 1047 1048 if (isSaturating) { 1049 if (isUnsigned) { 1050 // Negative overflow and NaN both are converted to 0, and the only 1051 // other case is positive overflow which is converted to 1052 // UINT64_MAX. 1053 Label positive; 1054 ScratchFloat32Scope fpscratch(*this); 1055 loadConstantFloat32(0.0f, fpscratch); 1056 branchFloat(Assembler::DoubleGreaterThan, input, fpscratch, &positive); 1057 move64(Imm64(0), output); 1058 jump(rejoin); 1059 1060 bind(&positive); 1061 move64(Imm64(UINT64_MAX), output); 1062 } else { 1063 // Negative overflow is already saturated to INT64_MIN, so we only 1064 // have to handle NaN and positive overflow here. 1065 Label notNaN; 1066 branchFloat(Assembler::DoubleOrdered, input, input, ¬NaN); 1067 move64(Imm64(0), output); 1068 jump(rejoin); 1069 1070 bind(¬NaN); 1071 ScratchFloat32Scope fpscratch(*this); 1072 loadConstantFloat32(0.0f, fpscratch); 1073 branchFloat(Assembler::DoubleLessThan, input, fpscratch, rejoin); 1074 sub64(Imm64(1), output); 1075 } 1076 jump(rejoin); 1077 return; 1078 } 1079 1080 AutoHandleWasmTruncateToIntErrors traps(*this, trapSiteDesc); 1081 1082 // Eagerly take care of NaNs. 1083 branchFloat(Assembler::DoubleUnordered, input, input, &traps.inputIsNaN); 1084 1085 // Handle special values. 1086 if (isUnsigned) { 1087 ScratchFloat32Scope fpscratch(*this); 1088 loadConstantFloat32(0.0f, fpscratch); 1089 branchFloat(Assembler::DoubleGreaterThan, input, fpscratch, 1090 &traps.intOverflow); 1091 loadConstantFloat32(-1.0f, fpscratch); 1092 branchFloat(Assembler::DoubleLessThanOrEqual, input, fpscratch, 1093 &traps.intOverflow); 1094 jump(rejoin); 1095 return; 1096 } 1097 1098 // We've used vcvtss2sq. See comment in outOfLineWasmTruncateDoubleToInt64. 1099 ScratchFloat32Scope fpscratch(*this); 1100 loadConstantFloat32(float(int64_t(INT64_MIN)), fpscratch); 1101 branchFloat(Assembler::DoubleNotEqual, input, fpscratch, &traps.intOverflow); 1102 jump(rejoin); 1103 } 1104 1105 void MacroAssembler::enterFakeExitFrameForWasm(Register cxreg, Register scratch, 1106 ExitFrameType type) { 1107 enterFakeExitFrame(cxreg, scratch, type); 1108 } 1109 1110 CodeOffset MacroAssembler::sub32FromMemAndBranchIfNegativeWithPatch( 1111 Address address, Label* label) { 1112 // -128 is arbitrary, but makes `*address` count upwards, which may help 1113 // to identify cases where the subsequent ::patch..() call was forgotten. 1114 int numImmBytes = subl(Imm32(-128), Operand(address)); 1115 // This is vitally important for patching 1116 MOZ_RELEASE_ASSERT(numImmBytes == 1); 1117 // Points immediately after the location to patch 1118 CodeOffset patchPoint = CodeOffset(currentOffset()); 1119 jSrc(Condition::Signed, label); 1120 return patchPoint; 1121 } 1122 1123 void MacroAssembler::patchSub32FromMemAndBranchIfNegative(CodeOffset offset, 1124 Imm32 imm) { 1125 int32_t val = imm.value; 1126 // Patching it to zero would make the insn pointless 1127 MOZ_RELEASE_ASSERT(val >= 1 && val <= 127); 1128 uint8_t* ptr = (uint8_t*)masm.data() + offset.offset() - 1; 1129 MOZ_RELEASE_ASSERT(*ptr == uint8_t(-128)); // as created above 1130 *ptr = uint8_t(val) & 0x7F; 1131 } 1132 1133 // ======================================================================== 1134 // Primitive atomic operations. 1135 1136 static void ExtendTo32(MacroAssembler& masm, Scalar::Type type, Register r) { 1137 switch (type) { 1138 case Scalar::Int8: 1139 masm.movsbl(r, r); 1140 break; 1141 case Scalar::Uint8: 1142 masm.movzbl(r, r); 1143 break; 1144 case Scalar::Int16: 1145 masm.movswl(r, r); 1146 break; 1147 case Scalar::Uint16: 1148 masm.movzwl(r, r); 1149 break; 1150 case Scalar::Int32: 1151 case Scalar::Uint32: 1152 break; 1153 default: 1154 MOZ_CRASH("unexpected type"); 1155 } 1156 } 1157 1158 #ifdef DEBUG 1159 static inline bool IsByteReg(Register r) { 1160 AllocatableGeneralRegisterSet byteRegs(Registers::SingleByteRegs); 1161 return byteRegs.has(r); 1162 } 1163 1164 static inline bool IsByteReg(Imm32 r) { 1165 // Nothing 1166 return true; 1167 } 1168 #endif 1169 1170 template <typename T> 1171 static void CompareExchange(MacroAssembler& masm, 1172 const wasm::MemoryAccessDesc* access, 1173 Scalar::Type type, const T& mem, Register oldval, 1174 Register newval, Register output) { 1175 MOZ_ASSERT(output == eax); 1176 1177 if (oldval != output) { 1178 masm.movl(oldval, output); 1179 } 1180 1181 if (access) { 1182 masm.append(*access, wasm::TrapMachineInsn::Atomic, 1183 FaultingCodeOffset(masm.currentOffset())); 1184 } 1185 1186 // NOTE: the generated code must match the assembly code in gen_cmpxchg in 1187 // GenerateAtomicOperations.py 1188 switch (Scalar::byteSize(type)) { 1189 case 1: 1190 MOZ_ASSERT(IsByteReg(newval)); 1191 masm.lock_cmpxchgb(newval, Operand(mem)); 1192 break; 1193 case 2: 1194 masm.lock_cmpxchgw(newval, Operand(mem)); 1195 break; 1196 case 4: 1197 masm.lock_cmpxchgl(newval, Operand(mem)); 1198 break; 1199 default: 1200 MOZ_CRASH("Invalid"); 1201 } 1202 1203 ExtendTo32(masm, type, output); 1204 } 1205 1206 void MacroAssembler::compareExchange(Scalar::Type type, Synchronization, 1207 const Address& mem, Register oldval, 1208 Register newval, Register output) { 1209 CompareExchange(*this, nullptr, type, mem, oldval, newval, output); 1210 } 1211 1212 void MacroAssembler::compareExchange(Scalar::Type type, Synchronization, 1213 const BaseIndex& mem, Register oldval, 1214 Register newval, Register output) { 1215 CompareExchange(*this, nullptr, type, mem, oldval, newval, output); 1216 } 1217 1218 void MacroAssembler::wasmCompareExchange(const wasm::MemoryAccessDesc& access, 1219 const Address& mem, Register oldval, 1220 Register newval, Register output) { 1221 CompareExchange(*this, &access, access.type(), mem, oldval, newval, output); 1222 } 1223 1224 void MacroAssembler::wasmCompareExchange(const wasm::MemoryAccessDesc& access, 1225 const BaseIndex& mem, Register oldval, 1226 Register newval, Register output) { 1227 CompareExchange(*this, &access, access.type(), mem, oldval, newval, output); 1228 } 1229 1230 template <typename T> 1231 static void AtomicExchange(MacroAssembler& masm, 1232 const wasm::MemoryAccessDesc* access, 1233 Scalar::Type type, const T& mem, Register value, 1234 Register output) 1235 // NOTE: the generated code must match the assembly code in gen_exchange in 1236 // GenerateAtomicOperations.py 1237 { 1238 if (value != output) { 1239 masm.movl(value, output); 1240 } 1241 1242 if (access) { 1243 masm.append(*access, wasm::TrapMachineInsn::Atomic, 1244 FaultingCodeOffset(masm.currentOffset())); 1245 } 1246 1247 switch (Scalar::byteSize(type)) { 1248 case 1: 1249 MOZ_ASSERT(IsByteReg(output)); 1250 masm.xchgb(output, Operand(mem)); 1251 break; 1252 case 2: 1253 masm.xchgw(output, Operand(mem)); 1254 break; 1255 case 4: 1256 masm.xchgl(output, Operand(mem)); 1257 break; 1258 default: 1259 MOZ_CRASH("Invalid"); 1260 } 1261 ExtendTo32(masm, type, output); 1262 } 1263 1264 void MacroAssembler::atomicExchange(Scalar::Type type, Synchronization, 1265 const Address& mem, Register value, 1266 Register output) { 1267 AtomicExchange(*this, nullptr, type, mem, value, output); 1268 } 1269 1270 void MacroAssembler::atomicExchange(Scalar::Type type, Synchronization, 1271 const BaseIndex& mem, Register value, 1272 Register output) { 1273 AtomicExchange(*this, nullptr, type, mem, value, output); 1274 } 1275 1276 void MacroAssembler::wasmAtomicExchange(const wasm::MemoryAccessDesc& access, 1277 const Address& mem, Register value, 1278 Register output) { 1279 AtomicExchange(*this, &access, access.type(), mem, value, output); 1280 } 1281 1282 void MacroAssembler::wasmAtomicExchange(const wasm::MemoryAccessDesc& access, 1283 const BaseIndex& mem, Register value, 1284 Register output) { 1285 AtomicExchange(*this, &access, access.type(), mem, value, output); 1286 } 1287 1288 static void SetupValue(MacroAssembler& masm, AtomicOp op, Imm32 src, 1289 Register output) { 1290 if (op == AtomicOp::Sub) { 1291 masm.movl(Imm32(-src.value), output); 1292 } else { 1293 masm.movl(src, output); 1294 } 1295 } 1296 1297 static void SetupValue(MacroAssembler& masm, AtomicOp op, Register src, 1298 Register output) { 1299 if (src != output) { 1300 masm.movl(src, output); 1301 } 1302 if (op == AtomicOp::Sub) { 1303 masm.negl(output); 1304 } 1305 } 1306 1307 static auto WasmTrapMachineInsn(Scalar::Type arrayType, AtomicOp op) { 1308 switch (op) { 1309 case AtomicOp::Add: 1310 case AtomicOp::Sub: 1311 return wasm::TrapMachineInsn::Atomic; 1312 case AtomicOp::And: 1313 case AtomicOp::Or: 1314 case AtomicOp::Xor: 1315 switch (arrayType) { 1316 case Scalar::Int8: 1317 case Scalar::Uint8: 1318 return wasm::TrapMachineInsn::Load8; 1319 case Scalar::Int16: 1320 case Scalar::Uint16: 1321 return wasm::TrapMachineInsn::Load16; 1322 case Scalar::Int32: 1323 case Scalar::Uint32: 1324 return wasm::TrapMachineInsn::Load32; 1325 default: 1326 break; 1327 } 1328 [[fallthrough]]; 1329 default: 1330 break; 1331 } 1332 MOZ_CRASH(); 1333 } 1334 1335 template <typename T, typename V> 1336 static void AtomicFetchOp(MacroAssembler& masm, 1337 const wasm::MemoryAccessDesc* access, 1338 Scalar::Type arrayType, AtomicOp op, V value, 1339 const T& mem, Register temp, Register output) { 1340 // Note value can be an Imm or a Register. 1341 1342 // NOTE: the generated code must match the assembly code in gen_fetchop in 1343 // GenerateAtomicOperations.py 1344 1345 // Setup the output register. 1346 switch (op) { 1347 case AtomicOp::Add: 1348 case AtomicOp::Sub: 1349 MOZ_ASSERT(temp == InvalidReg); 1350 MOZ_ASSERT_IF(Scalar::byteSize(arrayType) == 1, 1351 IsByteReg(output) && IsByteReg(value)); 1352 1353 SetupValue(masm, op, value, output); 1354 break; 1355 case AtomicOp::And: 1356 case AtomicOp::Or: 1357 case AtomicOp::Xor: 1358 MOZ_ASSERT(output != temp && output == eax); 1359 MOZ_ASSERT_IF(Scalar::byteSize(arrayType) == 1, 1360 IsByteReg(output) && IsByteReg(temp)); 1361 1362 // Bitwise operations don't require any additional setup. 1363 break; 1364 default: 1365 MOZ_CRASH(); 1366 } 1367 1368 auto lock_xadd = [&]() { 1369 switch (arrayType) { 1370 case Scalar::Int8: 1371 case Scalar::Uint8: 1372 masm.lock_xaddb(output, Operand(mem)); 1373 break; 1374 case Scalar::Int16: 1375 case Scalar::Uint16: 1376 masm.lock_xaddw(output, Operand(mem)); 1377 break; 1378 case Scalar::Int32: 1379 case Scalar::Uint32: 1380 masm.lock_xaddl(output, Operand(mem)); 1381 break; 1382 default: 1383 MOZ_CRASH(); 1384 } 1385 }; 1386 1387 auto load = [&]() { 1388 switch (arrayType) { 1389 case Scalar::Int8: 1390 case Scalar::Uint8: 1391 masm.movzbl(Operand(mem), eax); 1392 break; 1393 case Scalar::Int16: 1394 case Scalar::Uint16: 1395 masm.movzwl(Operand(mem), eax); 1396 break; 1397 case Scalar::Int32: 1398 case Scalar::Uint32: 1399 masm.movl(Operand(mem), eax); 1400 break; 1401 default: 1402 MOZ_CRASH(); 1403 } 1404 }; 1405 1406 auto bitwiseOp = [&]() { 1407 switch (op) { 1408 case AtomicOp::And: 1409 masm.andl(value, temp); 1410 break; 1411 case AtomicOp::Or: 1412 masm.orl(value, temp); 1413 break; 1414 case AtomicOp::Xor: 1415 masm.xorl(value, temp); 1416 break; 1417 default: 1418 MOZ_CRASH(); 1419 } 1420 }; 1421 1422 auto lock_cmpxchg = [&]() { 1423 switch (arrayType) { 1424 case Scalar::Int8: 1425 case Scalar::Uint8: 1426 masm.lock_cmpxchgb(temp, Operand(mem)); 1427 break; 1428 case Scalar::Int16: 1429 case Scalar::Uint16: 1430 masm.lock_cmpxchgw(temp, Operand(mem)); 1431 break; 1432 case Scalar::Int32: 1433 case Scalar::Uint32: 1434 masm.lock_cmpxchgl(temp, Operand(mem)); 1435 break; 1436 default: 1437 MOZ_CRASH(); 1438 } 1439 }; 1440 1441 // Add trap instruction directly before the load. 1442 if (access) { 1443 masm.append(*access, WasmTrapMachineInsn(arrayType, op), 1444 FaultingCodeOffset(masm.currentOffset())); 1445 } 1446 1447 switch (op) { 1448 case AtomicOp::Add: 1449 case AtomicOp::Sub: 1450 // `add` and `sub` operations can be optimized with XADD. 1451 lock_xadd(); 1452 1453 ExtendTo32(masm, arrayType, output); 1454 break; 1455 1456 case AtomicOp::And: 1457 case AtomicOp::Or: 1458 case AtomicOp::Xor: { 1459 // Bitwise operations need a CAS loop. 1460 1461 // Load memory into eax. 1462 load(); 1463 1464 // Loop. 1465 Label again; 1466 masm.bind(&again); 1467 masm.movl(eax, temp); 1468 1469 // temp = temp <op> value. 1470 bitwiseOp(); 1471 1472 // Compare and swap `temp` with memory. 1473 lock_cmpxchg(); 1474 1475 // Repeat if the comparison failed. 1476 masm.j(MacroAssembler::NonZero, &again); 1477 1478 // Sign-extend the zero-extended load. 1479 if (Scalar::isSignedIntType(arrayType)) { 1480 ExtendTo32(masm, arrayType, eax); 1481 } 1482 break; 1483 } 1484 1485 default: 1486 MOZ_CRASH(); 1487 } 1488 } 1489 1490 void MacroAssembler::atomicFetchOp(Scalar::Type arrayType, Synchronization, 1491 AtomicOp op, Register value, 1492 const BaseIndex& mem, Register temp, 1493 Register output) { 1494 AtomicFetchOp(*this, nullptr, arrayType, op, value, mem, temp, output); 1495 } 1496 1497 void MacroAssembler::atomicFetchOp(Scalar::Type arrayType, Synchronization, 1498 AtomicOp op, Register value, 1499 const Address& mem, Register temp, 1500 Register output) { 1501 AtomicFetchOp(*this, nullptr, arrayType, op, value, mem, temp, output); 1502 } 1503 1504 void MacroAssembler::atomicFetchOp(Scalar::Type arrayType, Synchronization, 1505 AtomicOp op, Imm32 value, 1506 const BaseIndex& mem, Register temp, 1507 Register output) { 1508 AtomicFetchOp(*this, nullptr, arrayType, op, value, mem, temp, output); 1509 } 1510 1511 void MacroAssembler::atomicFetchOp(Scalar::Type arrayType, Synchronization, 1512 AtomicOp op, Imm32 value, const Address& mem, 1513 Register temp, Register output) { 1514 AtomicFetchOp(*this, nullptr, arrayType, op, value, mem, temp, output); 1515 } 1516 1517 void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access, 1518 AtomicOp op, Register value, 1519 const Address& mem, Register temp, 1520 Register output) { 1521 AtomicFetchOp(*this, &access, access.type(), op, value, mem, temp, output); 1522 } 1523 1524 void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access, 1525 AtomicOp op, Imm32 value, 1526 const Address& mem, Register temp, 1527 Register output) { 1528 AtomicFetchOp(*this, &access, access.type(), op, value, mem, temp, output); 1529 } 1530 1531 void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access, 1532 AtomicOp op, Register value, 1533 const BaseIndex& mem, Register temp, 1534 Register output) { 1535 AtomicFetchOp(*this, &access, access.type(), op, value, mem, temp, output); 1536 } 1537 1538 void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access, 1539 AtomicOp op, Imm32 value, 1540 const BaseIndex& mem, Register temp, 1541 Register output) { 1542 AtomicFetchOp(*this, &access, access.type(), op, value, mem, temp, output); 1543 } 1544 1545 template <typename T, typename V> 1546 static void AtomicEffectOp(MacroAssembler& masm, 1547 const wasm::MemoryAccessDesc* access, 1548 Scalar::Type arrayType, AtomicOp op, V value, 1549 const T& mem) { 1550 if (access) { 1551 masm.append(*access, wasm::TrapMachineInsn::Atomic, 1552 FaultingCodeOffset(masm.currentOffset())); 1553 } 1554 1555 switch (Scalar::byteSize(arrayType)) { 1556 case 1: 1557 switch (op) { 1558 case AtomicOp::Add: 1559 masm.lock_addb(value, Operand(mem)); 1560 break; 1561 case AtomicOp::Sub: 1562 masm.lock_subb(value, Operand(mem)); 1563 break; 1564 case AtomicOp::And: 1565 masm.lock_andb(value, Operand(mem)); 1566 break; 1567 case AtomicOp::Or: 1568 masm.lock_orb(value, Operand(mem)); 1569 break; 1570 case AtomicOp::Xor: 1571 masm.lock_xorb(value, Operand(mem)); 1572 break; 1573 default: 1574 MOZ_CRASH(); 1575 } 1576 break; 1577 case 2: 1578 switch (op) { 1579 case AtomicOp::Add: 1580 masm.lock_addw(value, Operand(mem)); 1581 break; 1582 case AtomicOp::Sub: 1583 masm.lock_subw(value, Operand(mem)); 1584 break; 1585 case AtomicOp::And: 1586 masm.lock_andw(value, Operand(mem)); 1587 break; 1588 case AtomicOp::Or: 1589 masm.lock_orw(value, Operand(mem)); 1590 break; 1591 case AtomicOp::Xor: 1592 masm.lock_xorw(value, Operand(mem)); 1593 break; 1594 default: 1595 MOZ_CRASH(); 1596 } 1597 break; 1598 case 4: 1599 switch (op) { 1600 case AtomicOp::Add: 1601 masm.lock_addl(value, Operand(mem)); 1602 break; 1603 case AtomicOp::Sub: 1604 masm.lock_subl(value, Operand(mem)); 1605 break; 1606 case AtomicOp::And: 1607 masm.lock_andl(value, Operand(mem)); 1608 break; 1609 case AtomicOp::Or: 1610 masm.lock_orl(value, Operand(mem)); 1611 break; 1612 case AtomicOp::Xor: 1613 masm.lock_xorl(value, Operand(mem)); 1614 break; 1615 default: 1616 MOZ_CRASH(); 1617 } 1618 break; 1619 default: 1620 MOZ_CRASH(); 1621 } 1622 } 1623 1624 void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access, 1625 AtomicOp op, Register value, 1626 const Address& mem, Register temp) { 1627 MOZ_ASSERT(temp == InvalidReg); 1628 AtomicEffectOp(*this, &access, access.type(), op, value, mem); 1629 } 1630 1631 void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access, 1632 AtomicOp op, Imm32 value, 1633 const Address& mem, Register temp) { 1634 MOZ_ASSERT(temp == InvalidReg); 1635 AtomicEffectOp(*this, &access, access.type(), op, value, mem); 1636 } 1637 1638 void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access, 1639 AtomicOp op, Register value, 1640 const BaseIndex& mem, Register temp) { 1641 MOZ_ASSERT(temp == InvalidReg); 1642 AtomicEffectOp(*this, &access, access.type(), op, value, mem); 1643 } 1644 1645 void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access, 1646 AtomicOp op, Imm32 value, 1647 const BaseIndex& mem, Register temp) { 1648 MOZ_ASSERT(temp == InvalidReg); 1649 AtomicEffectOp(*this, &access, access.type(), op, value, mem); 1650 } 1651 1652 // ======================================================================== 1653 // JS atomic operations. 1654 1655 template <typename T> 1656 static void CompareExchangeJS(MacroAssembler& masm, Scalar::Type arrayType, 1657 Synchronization sync, const T& mem, 1658 Register oldval, Register newval, Register temp, 1659 AnyRegister output) { 1660 if (arrayType == Scalar::Uint32) { 1661 masm.compareExchange(arrayType, sync, mem, oldval, newval, temp); 1662 masm.convertUInt32ToDouble(temp, output.fpu()); 1663 } else { 1664 masm.compareExchange(arrayType, sync, mem, oldval, newval, output.gpr()); 1665 } 1666 } 1667 1668 void MacroAssembler::compareExchangeJS(Scalar::Type arrayType, 1669 Synchronization sync, const Address& mem, 1670 Register oldval, Register newval, 1671 Register temp, AnyRegister output) { 1672 CompareExchangeJS(*this, arrayType, sync, mem, oldval, newval, temp, output); 1673 } 1674 1675 void MacroAssembler::compareExchangeJS(Scalar::Type arrayType, 1676 Synchronization sync, 1677 const BaseIndex& mem, Register oldval, 1678 Register newval, Register temp, 1679 AnyRegister output) { 1680 CompareExchangeJS(*this, arrayType, sync, mem, oldval, newval, temp, output); 1681 } 1682 1683 template <typename T> 1684 static void AtomicExchangeJS(MacroAssembler& masm, Scalar::Type arrayType, 1685 Synchronization sync, const T& mem, Register value, 1686 Register temp, AnyRegister output) { 1687 if (arrayType == Scalar::Uint32) { 1688 masm.atomicExchange(arrayType, sync, mem, value, temp); 1689 masm.convertUInt32ToDouble(temp, output.fpu()); 1690 } else { 1691 masm.atomicExchange(arrayType, sync, mem, value, output.gpr()); 1692 } 1693 } 1694 1695 void MacroAssembler::atomicExchangeJS(Scalar::Type arrayType, 1696 Synchronization sync, const Address& mem, 1697 Register value, Register temp, 1698 AnyRegister output) { 1699 AtomicExchangeJS(*this, arrayType, sync, mem, value, temp, output); 1700 } 1701 1702 void MacroAssembler::atomicExchangeJS(Scalar::Type arrayType, 1703 Synchronization sync, 1704 const BaseIndex& mem, Register value, 1705 Register temp, AnyRegister output) { 1706 AtomicExchangeJS(*this, arrayType, sync, mem, value, temp, output); 1707 } 1708 1709 template <typename T> 1710 static void AtomicFetchOpJS(MacroAssembler& masm, Scalar::Type arrayType, 1711 Synchronization sync, AtomicOp op, Register value, 1712 const T& mem, Register temp1, Register temp2, 1713 AnyRegister output) { 1714 if (arrayType == Scalar::Uint32) { 1715 masm.atomicFetchOp(arrayType, sync, op, value, mem, temp2, temp1); 1716 masm.convertUInt32ToDouble(temp1, output.fpu()); 1717 } else { 1718 masm.atomicFetchOp(arrayType, sync, op, value, mem, temp1, output.gpr()); 1719 } 1720 } 1721 1722 void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType, 1723 Synchronization sync, AtomicOp op, 1724 Register value, const Address& mem, 1725 Register temp1, Register temp2, 1726 AnyRegister output) { 1727 AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output); 1728 } 1729 1730 void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType, 1731 Synchronization sync, AtomicOp op, 1732 Register value, const BaseIndex& mem, 1733 Register temp1, Register temp2, 1734 AnyRegister output) { 1735 AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output); 1736 } 1737 1738 void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType, Synchronization, 1739 AtomicOp op, Register value, 1740 const BaseIndex& mem, Register temp) { 1741 MOZ_ASSERT(temp == InvalidReg); 1742 AtomicEffectOp(*this, nullptr, arrayType, op, value, mem); 1743 } 1744 1745 void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType, Synchronization, 1746 AtomicOp op, Register value, 1747 const Address& mem, Register temp) { 1748 MOZ_ASSERT(temp == InvalidReg); 1749 AtomicEffectOp(*this, nullptr, arrayType, op, value, mem); 1750 } 1751 1752 void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType, Synchronization, 1753 AtomicOp op, Imm32 value, 1754 const Address& mem, Register temp) { 1755 MOZ_ASSERT(temp == InvalidReg); 1756 AtomicEffectOp(*this, nullptr, arrayType, op, value, mem); 1757 } 1758 1759 void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType, 1760 Synchronization sync, AtomicOp op, 1761 Imm32 value, const BaseIndex& mem, 1762 Register temp) { 1763 MOZ_ASSERT(temp == InvalidReg); 1764 AtomicEffectOp(*this, nullptr, arrayType, op, value, mem); 1765 } 1766 1767 template <typename T> 1768 static void AtomicFetchOpJS(MacroAssembler& masm, Scalar::Type arrayType, 1769 Synchronization sync, AtomicOp op, Imm32 value, 1770 const T& mem, Register temp1, Register temp2, 1771 AnyRegister output) { 1772 if (arrayType == Scalar::Uint32) { 1773 masm.atomicFetchOp(arrayType, sync, op, value, mem, temp2, temp1); 1774 masm.convertUInt32ToDouble(temp1, output.fpu()); 1775 } else { 1776 masm.atomicFetchOp(arrayType, sync, op, value, mem, temp1, output.gpr()); 1777 } 1778 } 1779 1780 void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType, 1781 Synchronization sync, AtomicOp op, 1782 Imm32 value, const Address& mem, 1783 Register temp1, Register temp2, 1784 AnyRegister output) { 1785 AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output); 1786 } 1787 1788 void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType, 1789 Synchronization sync, AtomicOp op, 1790 Imm32 value, const BaseIndex& mem, 1791 Register temp1, Register temp2, 1792 AnyRegister output) { 1793 AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output); 1794 } 1795 1796 void MacroAssembler::atomicPause() { masm.pause(); } 1797 1798 // ======================================================================== 1799 // Spectre Mitigations. 1800 1801 void MacroAssembler::speculationBarrier() { 1802 // Spectre mitigation recommended by Intel and AMD suggest to use lfence as 1803 // a way to force all speculative execution of instructions to end. 1804 MOZ_ASSERT(HasSSE2()); 1805 masm.lfence(); 1806 } 1807 1808 void MacroAssembler::floorFloat32ToInt32(FloatRegister src, Register dest, 1809 Label* fail) { 1810 if (HasSSE41()) { 1811 // Fail on negative-zero. 1812 branchNegativeZeroFloat32(src, dest, fail); 1813 1814 // Round toward -Infinity. 1815 { 1816 ScratchFloat32Scope scratch(*this); 1817 vroundss(X86Encoding::RoundDown, src, scratch); 1818 truncateFloat32ToInt32(scratch, dest, fail); 1819 } 1820 } else { 1821 Label negative, end; 1822 1823 // Branch to a slow path for negative inputs. Doesn't catch NaN or -0. 1824 { 1825 ScratchFloat32Scope scratch(*this); 1826 zeroFloat32(scratch); 1827 branchFloat(Assembler::DoubleLessThan, src, scratch, &negative); 1828 } 1829 1830 // Fail on negative-zero. 1831 branchNegativeZeroFloat32(src, dest, fail); 1832 1833 // Input is non-negative, so truncation correctly rounds. 1834 truncateFloat32ToInt32(src, dest, fail); 1835 jump(&end); 1836 1837 // Input is negative, but isn't -0. 1838 // Negative values go on a comparatively expensive path, since no 1839 // native rounding mode matches JS semantics. Still better than callVM. 1840 bind(&negative); 1841 { 1842 // Truncate and round toward zero. 1843 // This is off-by-one for everything but integer-valued inputs. 1844 // 1845 // Directly call vcvttss2si instead of truncateFloat32ToInt32 because we 1846 // want to perform failure handling ourselves. 1847 vcvttss2si(src, dest); 1848 1849 // Test whether the input double was integer-valued. 1850 { 1851 ScratchFloat32Scope scratch(*this); 1852 convertInt32ToFloat32(dest, scratch); 1853 branchFloat(Assembler::DoubleEqualOrUnordered, src, scratch, &end); 1854 } 1855 1856 // Input is not integer-valued, so we rounded off-by-one in the 1857 // wrong direction. Correct by subtraction. 1858 // 1859 // Overflows if vcvttss2si returned the failure return value INT_MIN. 1860 branchSub32(Assembler::Overflow, Imm32(1), dest, fail); 1861 } 1862 1863 bind(&end); 1864 } 1865 } 1866 1867 void MacroAssembler::floorDoubleToInt32(FloatRegister src, Register dest, 1868 Label* fail) { 1869 if (HasSSE41()) { 1870 // Fail on negative-zero. 1871 branchNegativeZero(src, dest, fail); 1872 1873 // Round toward -Infinity. 1874 { 1875 ScratchDoubleScope scratch(*this); 1876 vroundsd(X86Encoding::RoundDown, src, scratch); 1877 truncateDoubleToInt32(scratch, dest, fail); 1878 } 1879 } else { 1880 Label negative, end; 1881 1882 // Branch to a slow path for negative inputs. Doesn't catch NaN or -0. 1883 { 1884 ScratchDoubleScope scratch(*this); 1885 zeroDouble(scratch); 1886 branchDouble(Assembler::DoubleLessThan, src, scratch, &negative); 1887 } 1888 1889 // Fail on negative-zero. 1890 branchNegativeZero(src, dest, fail); 1891 1892 // Input is non-negative, so truncation correctly rounds. 1893 truncateDoubleToInt32(src, dest, fail); 1894 jump(&end); 1895 1896 // Input is negative, but isn't -0. 1897 // Negative values go on a comparatively expensive path, since no 1898 // native rounding mode matches JS semantics. Still better than callVM. 1899 bind(&negative); 1900 { 1901 // Truncate and round toward zero. 1902 // This is off-by-one for everything but integer-valued inputs. 1903 // 1904 // Directly call vcvttsd2si instead of truncateDoubleToInt32 because we 1905 // want to perform failure handling ourselves. 1906 vcvttsd2si(src, dest); 1907 1908 // Test whether the input double was integer-valued. 1909 { 1910 ScratchDoubleScope scratch(*this); 1911 convertInt32ToDouble(dest, scratch); 1912 branchDouble(Assembler::DoubleEqualOrUnordered, src, scratch, &end); 1913 } 1914 1915 // Input is not integer-valued, so we rounded off-by-one in the 1916 // wrong direction. Correct by subtraction. 1917 // 1918 // Overflows if vcvttsd2si returned the failure return value INT_MIN. 1919 branchSub32(Assembler::Overflow, Imm32(1), dest, fail); 1920 } 1921 1922 bind(&end); 1923 } 1924 } 1925 1926 void MacroAssembler::ceilFloat32ToInt32(FloatRegister src, Register dest, 1927 Label* fail) { 1928 ScratchFloat32Scope scratch(*this); 1929 1930 Label lessThanOrEqualMinusOne; 1931 1932 // If x is in ]-1,0], ceil(x) is -0, which cannot be represented as an int32. 1933 // Fail if x > -1 and the sign bit is set. 1934 loadConstantFloat32(-1.f, scratch); 1935 branchFloat(Assembler::DoubleLessThanOrEqualOrUnordered, src, scratch, 1936 &lessThanOrEqualMinusOne); 1937 vmovmskps(src, dest); 1938 branchTest32(Assembler::NonZero, dest, Imm32(1), fail); 1939 1940 if (HasSSE41()) { 1941 // x <= -1 or x > -0 1942 bind(&lessThanOrEqualMinusOne); 1943 // Round toward +Infinity. 1944 vroundss(X86Encoding::RoundUp, src, scratch); 1945 truncateFloat32ToInt32(scratch, dest, fail); 1946 return; 1947 } 1948 1949 // No SSE4.1 1950 Label end; 1951 1952 // x >= 0 and x is not -0.0. We can truncate integer values, and truncate and 1953 // add 1 to non-integer values. This will also work for values >= INT_MAX + 1, 1954 // as the truncate operation will return INT_MIN and we'll fail. 1955 truncateFloat32ToInt32(src, dest, fail); 1956 convertInt32ToFloat32(dest, scratch); 1957 branchFloat(Assembler::DoubleEqualOrUnordered, src, scratch, &end); 1958 1959 // Input is not integer-valued, add 1 to obtain the ceiling value. 1960 // If input > INT_MAX, output == INT_MAX so adding 1 will overflow. 1961 branchAdd32(Assembler::Overflow, Imm32(1), dest, fail); 1962 jump(&end); 1963 1964 // x <= -1, truncation is the way to go. 1965 bind(&lessThanOrEqualMinusOne); 1966 truncateFloat32ToInt32(src, dest, fail); 1967 1968 bind(&end); 1969 } 1970 1971 void MacroAssembler::ceilDoubleToInt32(FloatRegister src, Register dest, 1972 Label* fail) { 1973 ScratchDoubleScope scratch(*this); 1974 1975 Label lessThanOrEqualMinusOne; 1976 1977 // If x is in ]-1,0], ceil(x) is -0, which cannot be represented as an int32. 1978 // Fail if x > -1 and the sign bit is set. 1979 loadConstantDouble(-1.0, scratch); 1980 branchDouble(Assembler::DoubleLessThanOrEqualOrUnordered, src, scratch, 1981 &lessThanOrEqualMinusOne); 1982 vmovmskpd(src, dest); 1983 branchTest32(Assembler::NonZero, dest, Imm32(1), fail); 1984 1985 if (HasSSE41()) { 1986 // x <= -1 or x > -0 1987 bind(&lessThanOrEqualMinusOne); 1988 // Round toward +Infinity. 1989 vroundsd(X86Encoding::RoundUp, src, scratch); 1990 truncateDoubleToInt32(scratch, dest, fail); 1991 return; 1992 } 1993 1994 // No SSE4.1 1995 Label end; 1996 1997 // x >= 0 and x is not -0.0. We can truncate integer values, and truncate and 1998 // add 1 to non-integer values. This will also work for values >= INT_MAX + 1, 1999 // as the truncate operation will return INT_MIN and we'll fail. 2000 truncateDoubleToInt32(src, dest, fail); 2001 convertInt32ToDouble(dest, scratch); 2002 branchDouble(Assembler::DoubleEqualOrUnordered, src, scratch, &end); 2003 2004 // Input is not integer-valued, add 1 to obtain the ceiling value. 2005 // If input > INT_MAX, output == INT_MAX so adding 1 will overflow. 2006 branchAdd32(Assembler::Overflow, Imm32(1), dest, fail); 2007 jump(&end); 2008 2009 // x <= -1, truncation is the way to go. 2010 bind(&lessThanOrEqualMinusOne); 2011 truncateDoubleToInt32(src, dest, fail); 2012 2013 bind(&end); 2014 } 2015 2016 void MacroAssembler::truncDoubleToInt32(FloatRegister src, Register dest, 2017 Label* fail) { 2018 Label lessThanOrEqualMinusOne; 2019 2020 // Bail on ]-1; -0] range 2021 { 2022 ScratchDoubleScope scratch(*this); 2023 loadConstantDouble(-1, scratch); 2024 branchDouble(Assembler::DoubleLessThanOrEqualOrUnordered, src, scratch, 2025 &lessThanOrEqualMinusOne); 2026 } 2027 2028 // Test for remaining values with the sign bit set, i.e. ]-1; -0] 2029 vmovmskpd(src, dest); 2030 branchTest32(Assembler::NonZero, dest, Imm32(1), fail); 2031 2032 // x <= -1 or x >= +0, truncation is the way to go. 2033 bind(&lessThanOrEqualMinusOne); 2034 truncateDoubleToInt32(src, dest, fail); 2035 } 2036 2037 void MacroAssembler::truncFloat32ToInt32(FloatRegister src, Register dest, 2038 Label* fail) { 2039 Label lessThanOrEqualMinusOne; 2040 2041 // Bail on ]-1; -0] range 2042 { 2043 ScratchFloat32Scope scratch(*this); 2044 loadConstantFloat32(-1.f, scratch); 2045 branchFloat(Assembler::DoubleLessThanOrEqualOrUnordered, src, scratch, 2046 &lessThanOrEqualMinusOne); 2047 } 2048 2049 // Test for remaining values with the sign bit set, i.e. ]-1; -0] 2050 vmovmskps(src, dest); 2051 branchTest32(Assembler::NonZero, dest, Imm32(1), fail); 2052 2053 // x <= -1 or x >= +0, truncation is the way to go. 2054 bind(&lessThanOrEqualMinusOne); 2055 truncateFloat32ToInt32(src, dest, fail); 2056 } 2057 2058 void MacroAssembler::roundFloat32ToInt32(FloatRegister src, Register dest, 2059 FloatRegister temp, Label* fail) { 2060 ScratchFloat32Scope scratch(*this); 2061 2062 Label negativeOrZero, negative, end; 2063 2064 // Branch to a slow path for non-positive inputs. Doesn't catch NaN. 2065 zeroFloat32(scratch); 2066 loadConstantFloat32(GetBiggestNumberLessThan(0.5f), temp); 2067 branchFloat(Assembler::DoubleLessThanOrEqual, src, scratch, &negativeOrZero); 2068 { 2069 // Input is strictly positive or NaN. Add the biggest float less than 0.5 2070 // and truncate, rounding down (because if the input is the biggest float 2071 // less than 0.5, adding 0.5 would undesirably round up to 1). Note that we 2072 // have to add the input to the temp register because we're not allowed to 2073 // modify the input register. 2074 addFloat32(src, temp); 2075 truncateFloat32ToInt32(temp, dest, fail); 2076 jump(&end); 2077 } 2078 2079 // Input is negative, +0 or -0. 2080 bind(&negativeOrZero); 2081 { 2082 // Branch on negative input. 2083 j(Assembler::NotEqual, &negative); 2084 2085 // Fail on negative-zero. 2086 branchNegativeZeroFloat32(src, dest, fail); 2087 2088 // Input is +0. 2089 xor32(dest, dest); 2090 jump(&end); 2091 } 2092 2093 // Input is negative. 2094 bind(&negative); 2095 { 2096 // Inputs in [-0.5, 0) are rounded to -0. Fail. 2097 loadConstantFloat32(-0.5f, scratch); 2098 branchFloat(Assembler::DoubleGreaterThanOrEqual, src, scratch, fail); 2099 2100 // Other negative inputs need the biggest float less than 0.5 added. 2101 // 2102 // The result is stored in the temp register (currently contains the biggest 2103 // float less than 0.5). 2104 addFloat32(src, temp); 2105 2106 if (HasSSE41()) { 2107 // Round toward -Infinity. 2108 vroundss(X86Encoding::RoundDown, temp, scratch); 2109 2110 // Truncate. 2111 truncateFloat32ToInt32(scratch, dest, fail); 2112 } else { 2113 // Round toward -Infinity without the benefit of ROUNDSS. 2114 2115 // Truncate and round toward zero. 2116 // This is off-by-one for everything but integer-valued inputs. 2117 // 2118 // Directly call vcvttss2si instead of truncateFloat32ToInt32 because we 2119 // want to perform failure handling ourselves. 2120 vcvttss2si(temp, dest); 2121 2122 // Test whether the truncated float was integer-valued. 2123 convertInt32ToFloat32(dest, scratch); 2124 branchFloat(Assembler::DoubleEqualOrUnordered, temp, scratch, &end); 2125 2126 // Input is not integer-valued, so we rounded off-by-one in the 2127 // wrong direction. Correct by subtraction. 2128 // 2129 // Overflows if vcvttss2si returned the failure return value INT_MIN. 2130 branchSub32(Assembler::Overflow, Imm32(1), dest, fail); 2131 } 2132 } 2133 2134 bind(&end); 2135 } 2136 2137 void MacroAssembler::roundDoubleToInt32(FloatRegister src, Register dest, 2138 FloatRegister temp, Label* fail) { 2139 ScratchDoubleScope scratch(*this); 2140 2141 Label negativeOrZero, negative, end; 2142 2143 // Branch to a slow path for non-positive inputs. Doesn't catch NaN. 2144 zeroDouble(scratch); 2145 loadConstantDouble(GetBiggestNumberLessThan(0.5), temp); 2146 branchDouble(Assembler::DoubleLessThanOrEqual, src, scratch, &negativeOrZero); 2147 { 2148 // Input is strictly positive or NaN. Add the biggest double less than 0.5 2149 // and truncate, rounding down (because if the input is the biggest double 2150 // less than 0.5, adding 0.5 would undesirably round up to 1). Note that we 2151 // have to add the input to the temp register because we're not allowed to 2152 // modify the input register. 2153 addDouble(src, temp); 2154 truncateDoubleToInt32(temp, dest, fail); 2155 jump(&end); 2156 } 2157 2158 // Input is negative, +0 or -0. 2159 bind(&negativeOrZero); 2160 { 2161 // Branch on negative input. 2162 j(Assembler::NotEqual, &negative); 2163 2164 // Fail on negative-zero. 2165 branchNegativeZero(src, dest, fail, /* maybeNonZero = */ false); 2166 2167 // Input is +0 2168 xor32(dest, dest); 2169 jump(&end); 2170 } 2171 2172 // Input is negative. 2173 bind(&negative); 2174 { 2175 // Inputs in [-0.5, 0) are rounded to -0. Fail. 2176 loadConstantDouble(-0.5, scratch); 2177 branchDouble(Assembler::DoubleGreaterThanOrEqual, src, scratch, fail); 2178 2179 // Other negative inputs need the biggest double less than 0.5 added. 2180 // 2181 // The result is stored in the temp register (currently contains the biggest 2182 // double less than 0.5). 2183 addDouble(src, temp); 2184 2185 if (HasSSE41()) { 2186 // Round toward -Infinity. 2187 vroundsd(X86Encoding::RoundDown, temp, scratch); 2188 2189 // Truncate. 2190 truncateDoubleToInt32(scratch, dest, fail); 2191 } else { 2192 // Round toward -Infinity without the benefit of ROUNDSD. 2193 2194 // Truncate and round toward zero. 2195 // This is off-by-one for everything but integer-valued inputs. 2196 // 2197 // Directly call vcvttsd2si instead of truncateDoubleToInt32 because we 2198 // want to perform failure handling ourselves. 2199 vcvttsd2si(temp, dest); 2200 2201 // Test whether the truncated double was integer-valued. 2202 convertInt32ToDouble(dest, scratch); 2203 branchDouble(Assembler::DoubleEqualOrUnordered, temp, scratch, &end); 2204 2205 // Input is not integer-valued, so we rounded off-by-one in the 2206 // wrong direction. Correct by subtraction. 2207 // 2208 // Overflows if vcvttsd2si returned the failure return value INT_MIN. 2209 branchSub32(Assembler::Overflow, Imm32(1), dest, fail); 2210 } 2211 } 2212 2213 bind(&end); 2214 } 2215 2216 void MacroAssembler::nearbyIntDouble(RoundingMode mode, FloatRegister src, 2217 FloatRegister dest) { 2218 MOZ_ASSERT(HasRoundInstruction(mode)); 2219 vroundsd(Assembler::ToX86RoundingMode(mode), src, dest); 2220 } 2221 2222 void MacroAssembler::nearbyIntFloat32(RoundingMode mode, FloatRegister src, 2223 FloatRegister dest) { 2224 MOZ_ASSERT(HasRoundInstruction(mode)); 2225 vroundss(Assembler::ToX86RoundingMode(mode), src, dest); 2226 } 2227 2228 void MacroAssembler::copySignDouble(FloatRegister lhs, FloatRegister rhs, 2229 FloatRegister output) { 2230 ScratchDoubleScope scratch(*this); 2231 2232 double keepSignMask = mozilla::BitwiseCast<double>(INT64_MIN); 2233 double clearSignMask = mozilla::BitwiseCast<double>(INT64_MAX); 2234 2235 if (HasAVX()) { 2236 if (rhs == output) { 2237 MOZ_ASSERT(lhs != rhs); 2238 vandpdSimd128(SimdConstant::SplatX2(keepSignMask), rhs, output); 2239 vandpdSimd128(SimdConstant::SplatX2(clearSignMask), lhs, scratch); 2240 } else { 2241 vandpdSimd128(SimdConstant::SplatX2(clearSignMask), lhs, output); 2242 vandpdSimd128(SimdConstant::SplatX2(keepSignMask), rhs, scratch); 2243 } 2244 } else { 2245 if (rhs == output) { 2246 MOZ_ASSERT(lhs != rhs); 2247 loadConstantDouble(keepSignMask, scratch); 2248 vandpd(scratch, rhs, output); 2249 2250 loadConstantDouble(clearSignMask, scratch); 2251 vandpd(lhs, scratch, scratch); 2252 } else { 2253 loadConstantDouble(clearSignMask, scratch); 2254 vandpd(scratch, lhs, output); 2255 2256 loadConstantDouble(keepSignMask, scratch); 2257 vandpd(rhs, scratch, scratch); 2258 } 2259 } 2260 2261 vorpd(scratch, output, output); 2262 } 2263 2264 void MacroAssembler::copySignFloat32(FloatRegister lhs, FloatRegister rhs, 2265 FloatRegister output) { 2266 ScratchFloat32Scope scratch(*this); 2267 2268 float keepSignMask = mozilla::BitwiseCast<float>(INT32_MIN); 2269 float clearSignMask = mozilla::BitwiseCast<float>(INT32_MAX); 2270 2271 if (HasAVX()) { 2272 if (rhs == output) { 2273 MOZ_ASSERT(lhs != rhs); 2274 vandpsSimd128(SimdConstant::SplatX4(keepSignMask), rhs, output); 2275 vandpsSimd128(SimdConstant::SplatX4(clearSignMask), lhs, scratch); 2276 } else { 2277 vandpsSimd128(SimdConstant::SplatX4(clearSignMask), lhs, output); 2278 vandpsSimd128(SimdConstant::SplatX4(keepSignMask), rhs, scratch); 2279 } 2280 } else { 2281 if (rhs == output) { 2282 MOZ_ASSERT(lhs != rhs); 2283 loadConstantFloat32(keepSignMask, scratch); 2284 vandps(scratch, output, output); 2285 2286 loadConstantFloat32(clearSignMask, scratch); 2287 vandps(lhs, scratch, scratch); 2288 } else { 2289 loadConstantFloat32(clearSignMask, scratch); 2290 vandps(scratch, lhs, output); 2291 2292 loadConstantFloat32(keepSignMask, scratch); 2293 vandps(rhs, scratch, scratch); 2294 } 2295 } 2296 2297 vorps(scratch, output, output); 2298 } 2299 2300 void MacroAssembler::shiftIndex32AndAdd(Register indexTemp32, int shift, 2301 Register pointer) { 2302 if (IsShiftInScaleRange(shift)) { 2303 computeEffectiveAddress( 2304 BaseIndex(pointer, indexTemp32, ShiftToScale(shift)), pointer); 2305 return; 2306 } 2307 lshift32(Imm32(shift), indexTemp32); 2308 addPtr(indexTemp32, pointer); 2309 } 2310 2311 CodeOffset MacroAssembler::wasmMarkedSlowCall(const wasm::CallSiteDesc& desc, 2312 const Register reg) { 2313 CodeOffset offset = call(desc, reg); 2314 wasmMarkCallAsSlow(); 2315 return offset; 2316 } 2317 2318 //}}} check_macroassembler_style