MacroAssembler-x86-shared-inl.h (121962B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #ifndef jit_x86_shared_MacroAssembler_x86_shared_inl_h 8 #define jit_x86_shared_MacroAssembler_x86_shared_inl_h 9 10 #include "jit/x86-shared/MacroAssembler-x86-shared.h" 11 12 #include "mozilla/Casting.h" 13 #include "mozilla/MathAlgorithms.h" 14 15 namespace js { 16 namespace jit { 17 18 //{{{ check_macroassembler_style 19 // =============================================================== 20 // Move instructions 21 22 void MacroAssembler::moveFloat16ToGPR(FloatRegister src, Register dest) { 23 vmovd(src, dest); 24 25 // Ensure the hi-word is zeroed. 26 movzwl(dest, dest); 27 } 28 29 void MacroAssembler::moveGPRToFloat16(Register src, FloatRegister dest) { 30 // Ensure the hi-word is zeroed. 31 movzwl(src, src); 32 33 vmovd(src, dest); 34 } 35 36 void MacroAssembler::moveFloat32ToGPR(FloatRegister src, Register dest) { 37 vmovd(src, dest); 38 } 39 40 void MacroAssembler::moveGPRToFloat32(Register src, FloatRegister dest) { 41 vmovd(src, dest); 42 } 43 44 void MacroAssembler::moveLowDoubleToGPR(FloatRegister src, Register dest) { 45 vmovd(src, dest); 46 } 47 48 void MacroAssembler::move8ZeroExtend(Register src, Register dest) { 49 movzbl(src, dest); 50 } 51 52 void MacroAssembler::move8SignExtend(Register src, Register dest) { 53 movsbl(src, dest); 54 } 55 56 void MacroAssembler::move16SignExtend(Register src, Register dest) { 57 movswl(src, dest); 58 } 59 60 void MacroAssembler::loadAbiReturnAddress(Register dest) { 61 loadPtr(Address(getStackPointer(), 0), dest); 62 } 63 64 // =============================================================== 65 // Logical instructions 66 67 void MacroAssembler::not32(Register reg) { notl(reg); } 68 69 void MacroAssembler::and32(Register src, Register dest) { andl(src, dest); } 70 71 void MacroAssembler::and32(Imm32 imm, Register dest) { andl(imm, dest); } 72 73 void MacroAssembler::and32(Imm32 imm, Register src, Register dest) { 74 if (src != dest) { 75 movl(src, dest); 76 } 77 andl(imm, dest); 78 } 79 80 void MacroAssembler::and32(Imm32 imm, const Address& dest) { 81 andl(imm, Operand(dest)); 82 } 83 84 void MacroAssembler::and32(const Address& src, Register dest) { 85 andl(Operand(src), dest); 86 } 87 88 void MacroAssembler::or32(Register src, Register dest) { orl(src, dest); } 89 90 void MacroAssembler::or32(Imm32 imm, Register dest) { orl(imm, dest); } 91 92 void MacroAssembler::or32(Imm32 imm, Register src, Register dest) { 93 if (src != dest) { 94 movl(src, dest); 95 } 96 orl(imm, dest); 97 } 98 99 void MacroAssembler::or32(Imm32 imm, const Address& dest) { 100 orl(imm, Operand(dest)); 101 } 102 103 void MacroAssembler::xor32(Register src, Register dest) { xorl(src, dest); } 104 105 void MacroAssembler::xor32(Imm32 imm, Register dest) { xorl(imm, dest); } 106 107 void MacroAssembler::xor32(Imm32 imm, Register src, Register dest) { 108 if (src != dest) { 109 movl(src, dest); 110 } 111 xorl(imm, dest); 112 } 113 114 void MacroAssembler::xor32(Imm32 imm, const Address& dest) { 115 xorl(imm, Operand(dest)); 116 } 117 118 void MacroAssembler::xor32(const Address& src, Register dest) { 119 xorl(Operand(src), dest); 120 } 121 122 void MacroAssembler::clz32(Register src, Register dest, bool knownNotZero) { 123 if (AssemblerX86Shared::HasLZCNT()) { 124 lzcntl(src, dest); 125 return; 126 } 127 128 bsrl(src, dest); 129 if (!knownNotZero) { 130 // If the source is zero then bsrl leaves garbage in the destination. 131 Label nonzero; 132 j(Assembler::NonZero, &nonzero); 133 movl(Imm32(0x3F), dest); 134 bind(&nonzero); 135 } 136 xorl(Imm32(0x1F), dest); 137 } 138 139 void MacroAssembler::ctz32(Register src, Register dest, bool knownNotZero) { 140 if (AssemblerX86Shared::HasBMI1()) { 141 tzcntl(src, dest); 142 return; 143 } 144 145 bsfl(src, dest); 146 if (!knownNotZero) { 147 Label nonzero; 148 j(Assembler::NonZero, &nonzero); 149 movl(Imm32(32), dest); 150 bind(&nonzero); 151 } 152 } 153 154 void MacroAssembler::popcnt32(Register input, Register output, Register tmp) { 155 if (AssemblerX86Shared::HasPOPCNT()) { 156 popcntl(input, output); 157 return; 158 } 159 160 MOZ_ASSERT(tmp != InvalidReg); 161 162 // Equivalent to mozilla::CountPopulation32() 163 164 movl(input, tmp); 165 if (input != output) { 166 movl(input, output); 167 } 168 shrl(Imm32(1), output); 169 andl(Imm32(0x55555555), output); 170 subl(output, tmp); 171 movl(tmp, output); 172 andl(Imm32(0x33333333), output); 173 shrl(Imm32(2), tmp); 174 andl(Imm32(0x33333333), tmp); 175 addl(output, tmp); 176 movl(tmp, output); 177 shrl(Imm32(4), output); 178 addl(tmp, output); 179 andl(Imm32(0xF0F0F0F), output); 180 imull(Imm32(0x1010101), output, output); 181 shrl(Imm32(24), output); 182 } 183 184 // =============================================================== 185 // Swap instructions 186 187 void MacroAssembler::byteSwap16SignExtend(Register reg) { 188 rolw(Imm32(8), reg); 189 movswl(reg, reg); 190 } 191 192 void MacroAssembler::byteSwap16ZeroExtend(Register reg) { 193 rolw(Imm32(8), reg); 194 movzwl(reg, reg); 195 } 196 197 void MacroAssembler::byteSwap32(Register reg) { bswapl(reg); } 198 199 // =============================================================== 200 // Arithmetic instructions 201 202 void MacroAssembler::add32(const Address& src, Register dest) { 203 addl(Operand(src), dest); 204 } 205 206 void MacroAssembler::add32(Register src, Register dest) { addl(src, dest); } 207 208 void MacroAssembler::add32(Imm32 imm, Register dest) { addl(imm, dest); } 209 210 void MacroAssembler::add32(Imm32 imm, Register src, Register dest) { 211 leal(Operand(src, imm.value), dest); 212 } 213 214 void MacroAssembler::add32(Imm32 imm, const Address& dest) { 215 addl(imm, Operand(dest)); 216 } 217 218 void MacroAssembler::add32(Imm32 imm, const AbsoluteAddress& dest) { 219 addl(imm, Operand(dest)); 220 } 221 222 void MacroAssembler::addFloat32(FloatRegister src, FloatRegister dest) { 223 vaddss(src, dest, dest); 224 } 225 226 void MacroAssembler::addDouble(FloatRegister src, FloatRegister dest) { 227 vaddsd(src, dest, dest); 228 } 229 230 void MacroAssembler::sub32(Register src, Register dest) { subl(src, dest); } 231 232 void MacroAssembler::sub32(Imm32 imm, Register dest) { subl(imm, dest); } 233 234 void MacroAssembler::sub32(const Address& src, Register dest) { 235 subl(Operand(src), dest); 236 } 237 238 void MacroAssembler::subDouble(FloatRegister src, FloatRegister dest) { 239 vsubsd(src, dest, dest); 240 } 241 242 void MacroAssembler::subFloat32(FloatRegister src, FloatRegister dest) { 243 vsubss(src, dest, dest); 244 } 245 246 void MacroAssembler::mul32(Register rhs, Register srcDest) { 247 imull(rhs, srcDest); 248 } 249 250 void MacroAssembler::mul32(Imm32 imm, Register srcDest) { imull(imm, srcDest); } 251 252 void MacroAssembler::mulFloat32(FloatRegister src, FloatRegister dest) { 253 vmulss(src, dest, dest); 254 } 255 256 void MacroAssembler::mulDouble(FloatRegister src, FloatRegister dest) { 257 vmulsd(src, dest, dest); 258 } 259 260 void MacroAssembler::quotient32(Register lhs, Register rhs, Register dest, 261 Register tempEdx, bool isUnsigned) { 262 MOZ_ASSERT(lhs == eax && dest == eax && tempEdx == edx); 263 264 // Sign extend eax into edx to make (edx:eax): idiv/udiv are 64-bit. 265 if (isUnsigned) { 266 mov(ImmWord(0), edx); 267 udiv(rhs); 268 } else { 269 cdq(); 270 idiv(rhs); 271 } 272 } 273 274 void MacroAssembler::remainder32(Register lhs, Register rhs, Register dest, 275 Register tempEdx, bool isUnsigned) { 276 MOZ_ASSERT(lhs == eax && dest == eax && tempEdx == edx); 277 278 // Sign extend eax into edx to make (edx:eax): idiv/udiv are 64-bit. 279 if (isUnsigned) { 280 mov(ImmWord(0), edx); 281 udiv(rhs); 282 } else { 283 cdq(); 284 idiv(rhs); 285 } 286 mov(edx, eax); 287 } 288 289 void MacroAssembler::divFloat32(FloatRegister src, FloatRegister dest) { 290 vdivss(src, dest, dest); 291 } 292 293 void MacroAssembler::divDouble(FloatRegister src, FloatRegister dest) { 294 vdivsd(src, dest, dest); 295 } 296 297 void MacroAssembler::neg32(Register reg) { negl(reg); } 298 299 void MacroAssembler::negateFloat(FloatRegister reg) { 300 // XOR the float in a float register with -0.0. 301 vxorpsSimd128(SimdConstant::SplatX4(-0.0f), reg, reg); 302 } 303 304 void MacroAssembler::negateDouble(FloatRegister reg) { 305 // XOR the float in a float register with -0.0. 306 vxorpdSimd128(SimdConstant::SplatX2(-0.0), reg, reg); 307 } 308 309 void MacroAssembler::abs32(Register src, Register dest) { 310 if (src != dest) { 311 move32(src, dest); 312 } 313 Label positive; 314 branchTest32(Assembler::NotSigned, dest, dest, &positive); 315 neg32(dest); 316 bind(&positive); 317 } 318 319 void MacroAssembler::absFloat32(FloatRegister src, FloatRegister dest) { 320 float clearSignMask = mozilla::BitwiseCast<float>(INT32_MAX); 321 vandpsSimd128(SimdConstant::SplatX4(clearSignMask), src, dest); 322 } 323 324 void MacroAssembler::absDouble(FloatRegister src, FloatRegister dest) { 325 double clearSignMask = mozilla::BitwiseCast<double>(INT64_MAX); 326 vandpdSimd128(SimdConstant::SplatX2(clearSignMask), src, dest); 327 } 328 329 void MacroAssembler::sqrtFloat32(FloatRegister src, FloatRegister dest) { 330 vsqrtss(src, dest, dest); 331 } 332 333 void MacroAssembler::sqrtDouble(FloatRegister src, FloatRegister dest) { 334 vsqrtsd(src, dest, dest); 335 } 336 337 void MacroAssembler::minFloat32(FloatRegister other, FloatRegister srcDest, 338 bool handleNaN) { 339 minMaxFloat32(srcDest, other, handleNaN, false); 340 } 341 342 void MacroAssembler::minDouble(FloatRegister other, FloatRegister srcDest, 343 bool handleNaN) { 344 minMaxDouble(srcDest, other, handleNaN, false); 345 } 346 347 void MacroAssembler::maxFloat32(FloatRegister other, FloatRegister srcDest, 348 bool handleNaN) { 349 minMaxFloat32(srcDest, other, handleNaN, true); 350 } 351 352 void MacroAssembler::maxDouble(FloatRegister other, FloatRegister srcDest, 353 bool handleNaN) { 354 minMaxDouble(srcDest, other, handleNaN, true); 355 } 356 357 // =============================================================== 358 // Rotation instructions 359 void MacroAssembler::rotateLeft(Imm32 count, Register input, Register dest) { 360 MOZ_ASSERT(input == dest, "defineReuseInput"); 361 count.value &= 0x1f; 362 if (count.value) { 363 roll(count, input); 364 } 365 } 366 367 void MacroAssembler::rotateLeft(Register count, Register input, Register dest) { 368 MOZ_ASSERT(input == dest, "defineReuseInput"); 369 MOZ_ASSERT(count == ecx, "defineFixed(ecx)"); 370 roll_cl(input); 371 } 372 373 void MacroAssembler::rotateRight(Imm32 count, Register input, Register dest) { 374 MOZ_ASSERT(input == dest, "defineReuseInput"); 375 count.value &= 0x1f; 376 if (count.value) { 377 rorl(count, input); 378 } 379 } 380 381 void MacroAssembler::rotateRight(Register count, Register input, 382 Register dest) { 383 MOZ_ASSERT(input == dest, "defineReuseInput"); 384 MOZ_ASSERT(count == ecx, "defineFixed(ecx)"); 385 rorl_cl(input); 386 } 387 388 // =============================================================== 389 // Shift instructions 390 391 void MacroAssembler::lshift32(Register shift, Register srcDest) { 392 if (HasBMI2()) { 393 shlxl(srcDest, shift, srcDest); 394 return; 395 } 396 MOZ_ASSERT(shift == ecx); 397 shll_cl(srcDest); 398 } 399 400 void MacroAssembler::flexibleLshift32(Register shift, Register srcDest) { 401 if (HasBMI2()) { 402 shlxl(srcDest, shift, srcDest); 403 return; 404 } 405 if (shift == ecx) { 406 shll_cl(srcDest); 407 } else { 408 // Shift amount must be in ecx. 409 xchg(shift, ecx); 410 shll_cl(shift == srcDest ? ecx : srcDest == ecx ? shift : srcDest); 411 xchg(shift, ecx); 412 } 413 } 414 415 void MacroAssembler::rshift32(Register shift, Register srcDest) { 416 if (HasBMI2()) { 417 shrxl(srcDest, shift, srcDest); 418 return; 419 } 420 MOZ_ASSERT(shift == ecx); 421 shrl_cl(srcDest); 422 } 423 424 void MacroAssembler::flexibleRshift32(Register shift, Register srcDest) { 425 if (HasBMI2()) { 426 shrxl(srcDest, shift, srcDest); 427 return; 428 } 429 if (shift == ecx) { 430 shrl_cl(srcDest); 431 } else { 432 // Shift amount must be in ecx. 433 xchg(shift, ecx); 434 shrl_cl(shift == srcDest ? ecx : srcDest == ecx ? shift : srcDest); 435 xchg(shift, ecx); 436 } 437 } 438 439 void MacroAssembler::rshift32Arithmetic(Register shift, Register srcDest) { 440 if (HasBMI2()) { 441 sarxl(srcDest, shift, srcDest); 442 return; 443 } 444 MOZ_ASSERT(shift == ecx); 445 sarl_cl(srcDest); 446 } 447 448 void MacroAssembler::flexibleRshift32Arithmetic(Register shift, 449 Register srcDest) { 450 if (HasBMI2()) { 451 sarxl(srcDest, shift, srcDest); 452 return; 453 } 454 if (shift == ecx) { 455 sarl_cl(srcDest); 456 } else { 457 // Shift amount must be in ecx. 458 xchg(shift, ecx); 459 sarl_cl(shift == srcDest ? ecx : srcDest == ecx ? shift : srcDest); 460 xchg(shift, ecx); 461 } 462 } 463 464 void MacroAssembler::lshift32(Imm32 shift, Register srcDest) { 465 shll(shift, srcDest); 466 } 467 468 void MacroAssembler::lshift32(Imm32 shift, Register src, Register dest) { 469 if (src != dest) { 470 movl(src, dest); 471 } 472 shll(shift, dest); 473 } 474 475 void MacroAssembler::rshift32(Imm32 shift, Register srcDest) { 476 shrl(shift, srcDest); 477 } 478 479 void MacroAssembler::rshift32(Imm32 shift, Register src, Register dest) { 480 if (src != dest) { 481 movl(src, dest); 482 } 483 shrl(shift, dest); 484 } 485 486 void MacroAssembler::rshift32Arithmetic(Imm32 shift, Register srcDest) { 487 sarl(shift, srcDest); 488 } 489 490 void MacroAssembler::rshift32Arithmetic(Imm32 shift, Register src, 491 Register dest) { 492 if (src != dest) { 493 movl(src, dest); 494 } 495 sarl(shift, dest); 496 } 497 498 // =============================================================== 499 // Condition functions 500 501 void MacroAssembler::cmp8Set(Condition cond, Address lhs, Imm32 rhs, 502 Register dest) { 503 bool destIsZero = maybeEmitSetZeroByteRegister(lhs, rhs, dest); 504 cmp8(lhs, rhs); 505 emitSet(cond, dest, destIsZero); 506 } 507 508 void MacroAssembler::cmp16Set(Condition cond, Address lhs, Imm32 rhs, 509 Register dest) { 510 bool destIsZero = maybeEmitSetZeroByteRegister(lhs, rhs, dest); 511 cmp16(lhs, rhs); 512 emitSet(cond, dest, destIsZero); 513 } 514 515 template <typename T1, typename T2> 516 void MacroAssembler::cmp32Set(Condition cond, T1 lhs, T2 rhs, Register dest) { 517 bool destIsZero = maybeEmitSetZeroByteRegister(lhs, rhs, dest); 518 cmp32(lhs, rhs); 519 emitSet(cond, dest, destIsZero); 520 } 521 522 // =============================================================== 523 // Branch instructions 524 525 void MacroAssembler::branch8(Condition cond, const Address& lhs, Imm32 rhs, 526 Label* label) { 527 cmp8(lhs, rhs); 528 j(cond, label); 529 } 530 531 void MacroAssembler::branch8(Condition cond, const BaseIndex& lhs, Register rhs, 532 Label* label) { 533 cmp8(Operand(lhs), rhs); 534 j(cond, label); 535 } 536 537 void MacroAssembler::branch16(Condition cond, const Address& lhs, Imm32 rhs, 538 Label* label) { 539 cmp16(lhs, rhs); 540 j(cond, label); 541 } 542 543 void MacroAssembler::branch32(Condition cond, Register lhs, Register rhs, 544 Label* label) { 545 cmp32(lhs, rhs); 546 j(cond, label); 547 } 548 549 void MacroAssembler::branch32(Condition cond, Register lhs, Imm32 rhs, 550 Label* label) { 551 cmp32(lhs, rhs); 552 j(cond, label); 553 } 554 555 void MacroAssembler::branch32(Condition cond, const Address& lhs, Register rhs, 556 Label* label) { 557 cmp32(Operand(lhs), rhs); 558 j(cond, label); 559 } 560 561 void MacroAssembler::branch32(Condition cond, const Address& lhs, Imm32 rhs, 562 Label* label) { 563 cmp32(Operand(lhs), rhs); 564 j(cond, label); 565 } 566 567 void MacroAssembler::branch32(Condition cond, const BaseIndex& lhs, 568 Register rhs, Label* label) { 569 cmp32(Operand(lhs), rhs); 570 j(cond, label); 571 } 572 573 void MacroAssembler::branch32(Condition cond, const BaseIndex& lhs, Imm32 rhs, 574 Label* label) { 575 cmp32(Operand(lhs), rhs); 576 j(cond, label); 577 } 578 579 void MacroAssembler::branch32(Condition cond, const Operand& lhs, Register rhs, 580 Label* label) { 581 cmp32(lhs, rhs); 582 j(cond, label); 583 } 584 585 void MacroAssembler::branch32(Condition cond, const Operand& lhs, Imm32 rhs, 586 Label* label) { 587 cmp32(lhs, rhs); 588 j(cond, label); 589 } 590 591 void MacroAssembler::branchPtr(Condition cond, Register lhs, Register rhs, 592 Label* label) { 593 cmpPtr(lhs, rhs); 594 j(cond, label); 595 } 596 597 void MacroAssembler::branchPtr(Condition cond, Register lhs, Imm32 rhs, 598 Label* label) { 599 branchPtrImpl(cond, lhs, rhs, label); 600 } 601 602 void MacroAssembler::branchPtr(Condition cond, Register lhs, ImmPtr rhs, 603 Label* label) { 604 branchPtrImpl(cond, lhs, rhs, label); 605 } 606 607 void MacroAssembler::branchPtr(Condition cond, Register lhs, ImmGCPtr rhs, 608 Label* label) { 609 branchPtrImpl(cond, lhs, rhs, label); 610 } 611 612 void MacroAssembler::branchPtr(Condition cond, Register lhs, ImmWord rhs, 613 Label* label) { 614 branchPtrImpl(cond, lhs, rhs, label); 615 } 616 617 void MacroAssembler::branchPtr(Condition cond, const Address& lhs, Register rhs, 618 Label* label) { 619 branchPtrImpl(cond, lhs, rhs, label); 620 } 621 622 void MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmPtr rhs, 623 Label* label) { 624 branchPtrImpl(cond, lhs, rhs, label); 625 } 626 627 void MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmGCPtr rhs, 628 Label* label) { 629 branchPtrImpl(cond, lhs, rhs, label); 630 } 631 632 void MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmWord rhs, 633 Label* label) { 634 branchPtrImpl(cond, lhs, rhs, label); 635 } 636 637 void MacroAssembler::branchPtr(Condition cond, const BaseIndex& lhs, 638 ImmWord rhs, Label* label) { 639 branchPtrImpl(cond, lhs, rhs, label); 640 } 641 642 void MacroAssembler::branchPtr(Condition cond, const BaseIndex& lhs, 643 Register rhs, Label* label) { 644 branchPtrImpl(cond, lhs, rhs, label); 645 } 646 647 template <typename T, typename S> 648 void MacroAssembler::branchPtrImpl(Condition cond, const T& lhs, const S& rhs, 649 Label* label) { 650 cmpPtr(Operand(lhs), rhs); 651 j(cond, label); 652 } 653 654 void MacroAssembler::branchFloat(DoubleCondition cond, FloatRegister lhs, 655 FloatRegister rhs, Label* label) { 656 compareFloat(cond, lhs, rhs); 657 658 if (cond == DoubleEqual) { 659 Label unordered; 660 j(Parity, &unordered); 661 j(Equal, label); 662 bind(&unordered); 663 return; 664 } 665 666 if (cond == DoubleNotEqualOrUnordered) { 667 j(NotEqual, label); 668 j(Parity, label); 669 return; 670 } 671 672 MOZ_ASSERT(!(cond & DoubleConditionBitSpecial)); 673 j(ConditionFromDoubleCondition(cond), label); 674 } 675 676 void MacroAssembler::branchDouble(DoubleCondition cond, FloatRegister lhs, 677 FloatRegister rhs, Label* label) { 678 compareDouble(cond, lhs, rhs); 679 680 if (cond == DoubleEqual) { 681 Label unordered; 682 j(Parity, &unordered); 683 j(Equal, label); 684 bind(&unordered); 685 return; 686 } 687 if (cond == DoubleNotEqualOrUnordered) { 688 j(NotEqual, label); 689 j(Parity, label); 690 return; 691 } 692 693 MOZ_ASSERT(!(cond & DoubleConditionBitSpecial)); 694 j(ConditionFromDoubleCondition(cond), label); 695 } 696 697 template <typename T> 698 void MacroAssembler::branchAdd32(Condition cond, T src, Register dest, 699 Label* label) { 700 addl(src, dest); 701 j(cond, label); 702 } 703 704 template <typename T> 705 void MacroAssembler::branchSub32(Condition cond, T src, Register dest, 706 Label* label) { 707 subl(src, dest); 708 j(cond, label); 709 } 710 711 template <typename T> 712 void MacroAssembler::branchMul32(Condition cond, T src, Register dest, 713 Label* label) { 714 mul32(src, dest); 715 j(cond, label); 716 } 717 718 template <typename T> 719 void MacroAssembler::branchRshift32(Condition cond, T src, Register dest, 720 Label* label) { 721 MOZ_ASSERT(cond == Zero || cond == NonZero); 722 rshift32(src, dest); 723 j(cond, label); 724 } 725 726 void MacroAssembler::branchNeg32(Condition cond, Register reg, Label* label) { 727 MOZ_ASSERT(cond == Overflow); 728 neg32(reg); 729 j(cond, label); 730 } 731 732 template <typename T> 733 void MacroAssembler::branchAddPtr(Condition cond, T src, Register dest, 734 Label* label) { 735 addPtr(src, dest); 736 j(cond, label); 737 } 738 739 template <typename T> 740 void MacroAssembler::branchSubPtr(Condition cond, T src, Register dest, 741 Label* label) { 742 subPtr(src, dest); 743 j(cond, label); 744 } 745 746 void MacroAssembler::branchMulPtr(Condition cond, Register src, Register dest, 747 Label* label) { 748 mulPtr(src, dest); 749 j(cond, label); 750 } 751 752 void MacroAssembler::branchNegPtr(Condition cond, Register reg, Label* label) { 753 MOZ_ASSERT(cond == Overflow); 754 negPtr(reg); 755 j(cond, label); 756 } 757 758 void MacroAssembler::decBranchPtr(Condition cond, Register lhs, Imm32 rhs, 759 Label* label) { 760 subPtr(rhs, lhs); 761 j(cond, label); 762 } 763 764 void MacroAssembler::branchTest32(Condition cond, Register lhs, Register rhs, 765 Label* label) { 766 MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed || 767 cond == NotSigned); 768 test32(lhs, rhs); 769 j(cond, label); 770 } 771 772 void MacroAssembler::branchTest32(Condition cond, Register lhs, Imm32 rhs, 773 Label* label) { 774 MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed || 775 cond == NotSigned); 776 test32(lhs, rhs); 777 j(cond, label); 778 } 779 780 void MacroAssembler::branchTest32(Condition cond, const Address& lhs, Imm32 rhs, 781 Label* label) { 782 MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed || 783 cond == NotSigned); 784 test32(Operand(lhs), rhs); 785 j(cond, label); 786 } 787 788 void MacroAssembler::branchTestPtr(Condition cond, Register lhs, Register rhs, 789 Label* label) { 790 testPtr(lhs, rhs); 791 j(cond, label); 792 } 793 794 void MacroAssembler::branchTestPtr(Condition cond, Register lhs, Imm32 rhs, 795 Label* label) { 796 testPtr(lhs, rhs); 797 j(cond, label); 798 } 799 800 void MacroAssembler::branchTestPtr(Condition cond, const Address& lhs, 801 Imm32 rhs, Label* label) { 802 testPtr(Operand(lhs), rhs); 803 j(cond, label); 804 } 805 806 void MacroAssembler::branchTestUndefined(Condition cond, Register tag, 807 Label* label) { 808 branchTestUndefinedImpl(cond, tag, label); 809 } 810 811 void MacroAssembler::branchTestUndefined(Condition cond, const Address& address, 812 Label* label) { 813 branchTestUndefinedImpl(cond, address, label); 814 } 815 816 void MacroAssembler::branchTestUndefined(Condition cond, 817 const BaseIndex& address, 818 Label* label) { 819 branchTestUndefinedImpl(cond, address, label); 820 } 821 822 void MacroAssembler::branchTestUndefined(Condition cond, 823 const ValueOperand& value, 824 Label* label) { 825 branchTestUndefinedImpl(cond, value, label); 826 } 827 828 template <typename T> 829 void MacroAssembler::branchTestUndefinedImpl(Condition cond, const T& t, 830 Label* label) { 831 cond = testUndefined(cond, t); 832 j(cond, label); 833 } 834 835 void MacroAssembler::branchTestInt32(Condition cond, Register tag, 836 Label* label) { 837 branchTestInt32Impl(cond, tag, label); 838 } 839 840 void MacroAssembler::branchTestInt32(Condition cond, const Address& address, 841 Label* label) { 842 branchTestInt32Impl(cond, address, label); 843 } 844 845 void MacroAssembler::branchTestInt32(Condition cond, const BaseIndex& address, 846 Label* label) { 847 branchTestInt32Impl(cond, address, label); 848 } 849 850 void MacroAssembler::branchTestInt32(Condition cond, const ValueOperand& value, 851 Label* label) { 852 branchTestInt32Impl(cond, value, label); 853 } 854 855 template <typename T> 856 void MacroAssembler::branchTestInt32Impl(Condition cond, const T& t, 857 Label* label) { 858 cond = testInt32(cond, t); 859 j(cond, label); 860 } 861 862 void MacroAssembler::branchTestInt32Truthy(bool truthy, 863 const ValueOperand& value, 864 Label* label) { 865 Condition cond = testInt32Truthy(truthy, value); 866 j(cond, label); 867 } 868 869 void MacroAssembler::branchTestDouble(Condition cond, Register tag, 870 Label* label) { 871 branchTestDoubleImpl(cond, tag, label); 872 } 873 874 void MacroAssembler::branchTestDouble(Condition cond, const Address& address, 875 Label* label) { 876 branchTestDoubleImpl(cond, address, label); 877 } 878 879 void MacroAssembler::branchTestDouble(Condition cond, const BaseIndex& address, 880 Label* label) { 881 branchTestDoubleImpl(cond, address, label); 882 } 883 884 void MacroAssembler::branchTestDouble(Condition cond, const ValueOperand& value, 885 Label* label) { 886 branchTestDoubleImpl(cond, value, label); 887 } 888 889 template <typename T> 890 void MacroAssembler::branchTestDoubleImpl(Condition cond, const T& t, 891 Label* label) { 892 cond = testDouble(cond, t); 893 j(cond, label); 894 } 895 896 void MacroAssembler::branchTestDoubleTruthy(bool truthy, FloatRegister reg, 897 Label* label) { 898 Condition cond = testDoubleTruthy(truthy, reg); 899 j(cond, label); 900 } 901 902 void MacroAssembler::branchTestNumber(Condition cond, Register tag, 903 Label* label) { 904 branchTestNumberImpl(cond, tag, label); 905 } 906 907 void MacroAssembler::branchTestNumber(Condition cond, const ValueOperand& value, 908 Label* label) { 909 branchTestNumberImpl(cond, value, label); 910 } 911 912 template <typename T> 913 void MacroAssembler::branchTestNumberImpl(Condition cond, const T& t, 914 Label* label) { 915 cond = testNumber(cond, t); 916 j(cond, label); 917 } 918 919 void MacroAssembler::branchTestBoolean(Condition cond, Register tag, 920 Label* label) { 921 branchTestBooleanImpl(cond, tag, label); 922 } 923 924 void MacroAssembler::branchTestBoolean(Condition cond, const Address& address, 925 Label* label) { 926 branchTestBooleanImpl(cond, address, label); 927 } 928 929 void MacroAssembler::branchTestBoolean(Condition cond, const BaseIndex& address, 930 Label* label) { 931 branchTestBooleanImpl(cond, address, label); 932 } 933 934 void MacroAssembler::branchTestBoolean(Condition cond, 935 const ValueOperand& value, 936 Label* label) { 937 branchTestBooleanImpl(cond, value, label); 938 } 939 940 template <typename T> 941 void MacroAssembler::branchTestBooleanImpl(Condition cond, const T& t, 942 Label* label) { 943 cond = testBoolean(cond, t); 944 j(cond, label); 945 } 946 947 void MacroAssembler::branchTestString(Condition cond, Register tag, 948 Label* label) { 949 branchTestStringImpl(cond, tag, label); 950 } 951 952 void MacroAssembler::branchTestString(Condition cond, const Address& address, 953 Label* label) { 954 branchTestStringImpl(cond, address, label); 955 } 956 957 void MacroAssembler::branchTestString(Condition cond, const BaseIndex& address, 958 Label* label) { 959 branchTestStringImpl(cond, address, label); 960 } 961 962 void MacroAssembler::branchTestString(Condition cond, const ValueOperand& value, 963 Label* label) { 964 branchTestStringImpl(cond, value, label); 965 } 966 967 template <typename T> 968 void MacroAssembler::branchTestStringImpl(Condition cond, const T& t, 969 Label* label) { 970 cond = testString(cond, t); 971 j(cond, label); 972 } 973 974 void MacroAssembler::branchTestStringTruthy(bool truthy, 975 const ValueOperand& value, 976 Label* label) { 977 Condition cond = testStringTruthy(truthy, value); 978 j(cond, label); 979 } 980 981 void MacroAssembler::branchTestSymbol(Condition cond, Register tag, 982 Label* label) { 983 branchTestSymbolImpl(cond, tag, label); 984 } 985 986 void MacroAssembler::branchTestSymbol(Condition cond, const Address& address, 987 Label* label) { 988 branchTestSymbolImpl(cond, address, label); 989 } 990 991 void MacroAssembler::branchTestSymbol(Condition cond, const BaseIndex& address, 992 Label* label) { 993 branchTestSymbolImpl(cond, address, label); 994 } 995 996 void MacroAssembler::branchTestSymbol(Condition cond, const ValueOperand& value, 997 Label* label) { 998 branchTestSymbolImpl(cond, value, label); 999 } 1000 1001 template <typename T> 1002 void MacroAssembler::branchTestSymbolImpl(Condition cond, const T& t, 1003 Label* label) { 1004 cond = testSymbol(cond, t); 1005 j(cond, label); 1006 } 1007 1008 void MacroAssembler::branchTestBigInt(Condition cond, Register tag, 1009 Label* label) { 1010 branchTestBigIntImpl(cond, tag, label); 1011 } 1012 1013 void MacroAssembler::branchTestBigInt(Condition cond, const Address& address, 1014 Label* label) { 1015 branchTestBigIntImpl(cond, address, label); 1016 } 1017 1018 void MacroAssembler::branchTestBigInt(Condition cond, const BaseIndex& address, 1019 Label* label) { 1020 branchTestBigIntImpl(cond, address, label); 1021 } 1022 1023 void MacroAssembler::branchTestBigInt(Condition cond, const ValueOperand& value, 1024 Label* label) { 1025 branchTestBigIntImpl(cond, value, label); 1026 } 1027 1028 template <typename T> 1029 void MacroAssembler::branchTestBigIntImpl(Condition cond, const T& t, 1030 Label* label) { 1031 cond = testBigInt(cond, t); 1032 j(cond, label); 1033 } 1034 1035 void MacroAssembler::branchTestBigIntTruthy(bool truthy, 1036 const ValueOperand& value, 1037 Label* label) { 1038 Condition cond = testBigIntTruthy(truthy, value); 1039 j(cond, label); 1040 } 1041 1042 void MacroAssembler::branchTestNull(Condition cond, Register tag, 1043 Label* label) { 1044 branchTestNullImpl(cond, tag, label); 1045 } 1046 1047 void MacroAssembler::branchTestNull(Condition cond, const Address& address, 1048 Label* label) { 1049 branchTestNullImpl(cond, address, label); 1050 } 1051 1052 void MacroAssembler::branchTestNull(Condition cond, const BaseIndex& address, 1053 Label* label) { 1054 branchTestNullImpl(cond, address, label); 1055 } 1056 1057 void MacroAssembler::branchTestNull(Condition cond, const ValueOperand& value, 1058 Label* label) { 1059 branchTestNullImpl(cond, value, label); 1060 } 1061 1062 template <typename T> 1063 void MacroAssembler::branchTestNullImpl(Condition cond, const T& t, 1064 Label* label) { 1065 cond = testNull(cond, t); 1066 j(cond, label); 1067 } 1068 1069 void MacroAssembler::branchTestObject(Condition cond, Register tag, 1070 Label* label) { 1071 branchTestObjectImpl(cond, tag, label); 1072 } 1073 1074 void MacroAssembler::branchTestObject(Condition cond, const Address& address, 1075 Label* label) { 1076 branchTestObjectImpl(cond, address, label); 1077 } 1078 1079 void MacroAssembler::branchTestObject(Condition cond, const BaseIndex& address, 1080 Label* label) { 1081 branchTestObjectImpl(cond, address, label); 1082 } 1083 1084 void MacroAssembler::branchTestObject(Condition cond, const ValueOperand& value, 1085 Label* label) { 1086 branchTestObjectImpl(cond, value, label); 1087 } 1088 1089 template <typename T> 1090 void MacroAssembler::branchTestObjectImpl(Condition cond, const T& t, 1091 Label* label) { 1092 cond = testObject(cond, t); 1093 j(cond, label); 1094 } 1095 1096 void MacroAssembler::branchTestGCThing(Condition cond, const Address& address, 1097 Label* label) { 1098 branchTestGCThingImpl(cond, address, label); 1099 } 1100 1101 void MacroAssembler::branchTestGCThing(Condition cond, const BaseIndex& address, 1102 Label* label) { 1103 branchTestGCThingImpl(cond, address, label); 1104 } 1105 1106 void MacroAssembler::branchTestGCThing(Condition cond, 1107 const ValueOperand& value, 1108 Label* label) { 1109 branchTestGCThingImpl(cond, value, label); 1110 } 1111 1112 template <typename T> 1113 void MacroAssembler::branchTestGCThingImpl(Condition cond, const T& t, 1114 Label* label) { 1115 cond = testGCThing(cond, t); 1116 j(cond, label); 1117 } 1118 1119 void MacroAssembler::branchTestPrimitive(Condition cond, Register tag, 1120 Label* label) { 1121 branchTestPrimitiveImpl(cond, tag, label); 1122 } 1123 1124 void MacroAssembler::branchTestPrimitive(Condition cond, 1125 const ValueOperand& value, 1126 Label* label) { 1127 branchTestPrimitiveImpl(cond, value, label); 1128 } 1129 1130 template <typename T> 1131 void MacroAssembler::branchTestPrimitiveImpl(Condition cond, const T& t, 1132 Label* label) { 1133 cond = testPrimitive(cond, t); 1134 j(cond, label); 1135 } 1136 1137 void MacroAssembler::branchTestMagic(Condition cond, Register tag, 1138 Label* label) { 1139 branchTestMagicImpl(cond, tag, label); 1140 } 1141 1142 void MacroAssembler::branchTestMagic(Condition cond, const Address& address, 1143 Label* label) { 1144 branchTestMagicImpl(cond, address, label); 1145 } 1146 1147 void MacroAssembler::branchTestMagic(Condition cond, const BaseIndex& address, 1148 Label* label) { 1149 branchTestMagicImpl(cond, address, label); 1150 } 1151 1152 void MacroAssembler::branchTestMagic(Condition cond, const ValueOperand& value, 1153 Label* label) { 1154 branchTestMagicImpl(cond, value, label); 1155 } 1156 1157 template <typename T> 1158 void MacroAssembler::branchTestMagicImpl(Condition cond, const T& t, 1159 Label* label) { 1160 cond = testMagic(cond, t); 1161 j(cond, label); 1162 } 1163 1164 template <typename T> 1165 void MacroAssembler::testNumberSet(Condition cond, const T& src, 1166 Register dest) { 1167 bool destIsZero = maybeEmitSetZeroByteRegister(src, dest); 1168 cond = testNumber(cond, src); 1169 emitSet(cond, dest, destIsZero); 1170 } 1171 1172 template <typename T> 1173 void MacroAssembler::testBooleanSet(Condition cond, const T& src, 1174 Register dest) { 1175 bool destIsZero = maybeEmitSetZeroByteRegister(src, dest); 1176 cond = testBoolean(cond, src); 1177 emitSet(cond, dest, destIsZero); 1178 } 1179 1180 template <typename T> 1181 void MacroAssembler::testStringSet(Condition cond, const T& src, 1182 Register dest) { 1183 bool destIsZero = maybeEmitSetZeroByteRegister(src, dest); 1184 cond = testString(cond, src); 1185 emitSet(cond, dest, destIsZero); 1186 } 1187 1188 template <typename T> 1189 void MacroAssembler::testSymbolSet(Condition cond, const T& src, 1190 Register dest) { 1191 bool destIsZero = maybeEmitSetZeroByteRegister(src, dest); 1192 cond = testSymbol(cond, src); 1193 emitSet(cond, dest, destIsZero); 1194 } 1195 1196 template <typename T> 1197 void MacroAssembler::testBigIntSet(Condition cond, const T& src, 1198 Register dest) { 1199 bool destIsZero = maybeEmitSetZeroByteRegister(src, dest); 1200 cond = testBigInt(cond, src); 1201 emitSet(cond, dest, destIsZero); 1202 } 1203 1204 void MacroAssembler::cmp32Move32(Condition cond, Register lhs, Imm32 rhs, 1205 Register src, Register dest) { 1206 cmp32(lhs, rhs); 1207 cmovCCl(cond, src, dest); 1208 } 1209 1210 void MacroAssembler::cmp32Move32(Condition cond, Register lhs, Register rhs, 1211 Register src, Register dest) { 1212 cmp32(lhs, rhs); 1213 cmovCCl(cond, src, dest); 1214 } 1215 1216 void MacroAssembler::cmp32Move32(Condition cond, Register lhs, 1217 const Address& rhs, Register src, 1218 Register dest) { 1219 cmp32(lhs, Operand(rhs)); 1220 cmovCCl(cond, src, dest); 1221 } 1222 1223 void MacroAssembler::cmp32Load32(Condition cond, Register lhs, 1224 const Address& rhs, const Address& src, 1225 Register dest) { 1226 cmp32(lhs, Operand(rhs)); 1227 cmovCCl(cond, Operand(src), dest); 1228 } 1229 1230 void MacroAssembler::cmp32Load32(Condition cond, Register lhs, Register rhs, 1231 const Address& src, Register dest) { 1232 cmp32(lhs, rhs); 1233 cmovCCl(cond, Operand(src), dest); 1234 } 1235 1236 void MacroAssembler::cmp32Load32(Condition cond, Register lhs, Imm32 rhs, 1237 const Address& src, Register dest) { 1238 cmp32(lhs, rhs); 1239 cmovCCl(cond, Operand(src), dest); 1240 } 1241 1242 void MacroAssembler::spectreZeroRegister(Condition cond, Register scratch, 1243 Register dest) { 1244 // Note: use movl instead of move32/xorl to ensure flags are not clobbered. 1245 movl(Imm32(0), scratch); 1246 spectreMovePtr(cond, scratch, dest); 1247 } 1248 1249 // ======================================================================== 1250 // Memory access primitives. 1251 FaultingCodeOffset MacroAssembler::storeDouble(FloatRegister src, 1252 const Address& dest) { 1253 FaultingCodeOffset fco = FaultingCodeOffset(currentOffset()); 1254 vmovsd(src, dest); 1255 return fco; 1256 } 1257 FaultingCodeOffset MacroAssembler::storeDouble(FloatRegister src, 1258 const BaseIndex& dest) { 1259 FaultingCodeOffset fco = FaultingCodeOffset(currentOffset()); 1260 vmovsd(src, dest); 1261 return fco; 1262 } 1263 FaultingCodeOffset MacroAssembler::storeDouble(FloatRegister src, 1264 const Operand& dest) { 1265 switch (dest.kind()) { 1266 case Operand::MEM_REG_DISP: 1267 return storeDouble(src, dest.toAddress()); 1268 case Operand::MEM_SCALE: 1269 return storeDouble(src, dest.toBaseIndex()); 1270 default: 1271 MOZ_CRASH("unexpected operand kind"); 1272 } 1273 } 1274 1275 FaultingCodeOffset MacroAssembler::storeFloat32(FloatRegister src, 1276 const Address& dest) { 1277 FaultingCodeOffset fco = FaultingCodeOffset(currentOffset()); 1278 vmovss(src, dest); 1279 return fco; 1280 } 1281 FaultingCodeOffset MacroAssembler::storeFloat32(FloatRegister src, 1282 const BaseIndex& dest) { 1283 FaultingCodeOffset fco = FaultingCodeOffset(currentOffset()); 1284 vmovss(src, dest); 1285 return fco; 1286 } 1287 FaultingCodeOffset MacroAssembler::storeFloat32(FloatRegister src, 1288 const Operand& dest) { 1289 switch (dest.kind()) { 1290 case Operand::MEM_REG_DISP: 1291 return storeFloat32(src, dest.toAddress()); 1292 case Operand::MEM_SCALE: 1293 return storeFloat32(src, dest.toBaseIndex()); 1294 default: 1295 MOZ_CRASH("unexpected operand kind"); 1296 } 1297 } 1298 1299 FaultingCodeOffset MacroAssembler::storeFloat16(FloatRegister src, 1300 const Address& dest, 1301 Register scratch) { 1302 vmovd(src, scratch); 1303 1304 FaultingCodeOffset fco = FaultingCodeOffset(currentOffset()); 1305 movw(scratch, Operand(dest)); 1306 return fco; 1307 } 1308 FaultingCodeOffset MacroAssembler::storeFloat16(FloatRegister src, 1309 const BaseIndex& dest, 1310 Register scratch) { 1311 vmovd(src, scratch); 1312 1313 FaultingCodeOffset fco = FaultingCodeOffset(currentOffset()); 1314 movw(scratch, Operand(dest)); 1315 return fco; 1316 } 1317 1318 void MacroAssembler::memoryBarrier(MemoryBarrier barrier) { 1319 if (barrier.hasStoreLoad()) { 1320 // This implementation follows Linux. 1321 masm.mfence(); 1322 } 1323 } 1324 1325 // ======================================================================== 1326 // Wasm SIMD 1327 // 1328 // Some parts of the masm API are currently agnostic as to the data's 1329 // interpretation as int or float, despite the Intel architecture having 1330 // separate functional units and sometimes penalizing type-specific instructions 1331 // that operate on data in the "wrong" unit. 1332 // 1333 // For the time being, we always choose the integer interpretation when we are 1334 // forced to choose blind, but whether that is right or wrong depends on the 1335 // application. This applies to moveSimd128, loadConstantSimd128, 1336 // loadUnalignedSimd128, and storeUnalignedSimd128, at least. 1337 // 1338 // SSE4.1 or better is assumed. 1339 // 1340 // The order of operations here follows the header file. 1341 1342 // Moves. See comments above regarding integer operation. 1343 1344 void MacroAssembler::moveSimd128(FloatRegister src, FloatRegister dest) { 1345 MacroAssemblerX86Shared::moveSimd128Int(src, dest); 1346 } 1347 1348 // Constants. See comments above regarding integer operation. 1349 1350 void MacroAssembler::loadConstantSimd128(const SimdConstant& v, 1351 FloatRegister dest) { 1352 if (v.isFloatingType()) { 1353 loadConstantSimd128Float(v, dest); 1354 } else { 1355 loadConstantSimd128Int(v, dest); 1356 } 1357 } 1358 1359 // Splat 1360 1361 void MacroAssembler::splatX16(Register src, FloatRegister dest) { 1362 MacroAssemblerX86Shared::splatX16(src, dest); 1363 } 1364 1365 void MacroAssembler::splatX8(Register src, FloatRegister dest) { 1366 MacroAssemblerX86Shared::splatX8(src, dest); 1367 } 1368 1369 void MacroAssembler::splatX4(Register src, FloatRegister dest) { 1370 MacroAssemblerX86Shared::splatX4(src, dest); 1371 } 1372 1373 void MacroAssembler::splatX4(FloatRegister src, FloatRegister dest) { 1374 MacroAssemblerX86Shared::splatX4(src, dest); 1375 } 1376 1377 void MacroAssembler::splatX2(FloatRegister src, FloatRegister dest) { 1378 MacroAssemblerX86Shared::splatX2(src, dest); 1379 } 1380 1381 // Extract lane as scalar 1382 1383 void MacroAssembler::extractLaneInt8x16(uint32_t lane, FloatRegister src, 1384 Register dest) { 1385 MacroAssemblerX86Shared::extractLaneInt8x16(src, dest, lane, 1386 SimdSign::Signed); 1387 } 1388 1389 void MacroAssembler::unsignedExtractLaneInt8x16(uint32_t lane, 1390 FloatRegister src, 1391 Register dest) { 1392 MacroAssemblerX86Shared::extractLaneInt8x16(src, dest, lane, 1393 SimdSign::Unsigned); 1394 } 1395 1396 void MacroAssembler::extractLaneInt16x8(uint32_t lane, FloatRegister src, 1397 Register dest) { 1398 MacroAssemblerX86Shared::extractLaneInt16x8(src, dest, lane, 1399 SimdSign::Signed); 1400 } 1401 1402 void MacroAssembler::unsignedExtractLaneInt16x8(uint32_t lane, 1403 FloatRegister src, 1404 Register dest) { 1405 MacroAssemblerX86Shared::extractLaneInt16x8(src, dest, lane, 1406 SimdSign::Unsigned); 1407 } 1408 1409 void MacroAssembler::extractLaneInt32x4(uint32_t lane, FloatRegister src, 1410 Register dest) { 1411 MacroAssemblerX86Shared::extractLaneInt32x4(src, dest, lane); 1412 } 1413 1414 void MacroAssembler::extractLaneFloat32x4(uint32_t lane, FloatRegister src, 1415 FloatRegister dest) { 1416 MacroAssemblerX86Shared::extractLaneFloat32x4(src, dest, lane); 1417 } 1418 1419 void MacroAssembler::extractLaneFloat64x2(uint32_t lane, FloatRegister src, 1420 FloatRegister dest) { 1421 MacroAssemblerX86Shared::extractLaneFloat64x2(src, dest, lane); 1422 } 1423 1424 // Replace lane value 1425 1426 void MacroAssembler::replaceLaneInt8x16(unsigned lane, FloatRegister lhs, 1427 Register rhs, FloatRegister dest) { 1428 vpinsrb(lane, Operand(rhs), lhs, dest); 1429 } 1430 1431 void MacroAssembler::replaceLaneInt8x16(unsigned lane, Register rhs, 1432 FloatRegister lhsDest) { 1433 vpinsrb(lane, Operand(rhs), lhsDest, lhsDest); 1434 } 1435 1436 void MacroAssembler::replaceLaneInt16x8(unsigned lane, FloatRegister lhs, 1437 Register rhs, FloatRegister dest) { 1438 vpinsrw(lane, Operand(rhs), lhs, dest); 1439 } 1440 1441 void MacroAssembler::replaceLaneInt16x8(unsigned lane, Register rhs, 1442 FloatRegister lhsDest) { 1443 vpinsrw(lane, Operand(rhs), lhsDest, lhsDest); 1444 } 1445 1446 void MacroAssembler::replaceLaneInt32x4(unsigned lane, FloatRegister lhs, 1447 Register rhs, FloatRegister dest) { 1448 vpinsrd(lane, rhs, lhs, dest); 1449 } 1450 1451 void MacroAssembler::replaceLaneInt32x4(unsigned lane, Register rhs, 1452 FloatRegister lhsDest) { 1453 vpinsrd(lane, rhs, lhsDest, lhsDest); 1454 } 1455 1456 void MacroAssembler::replaceLaneFloat32x4(unsigned lane, FloatRegister lhs, 1457 FloatRegister rhs, 1458 FloatRegister dest) { 1459 MacroAssemblerX86Shared::replaceLaneFloat32x4(lane, lhs, rhs, dest); 1460 } 1461 1462 void MacroAssembler::replaceLaneFloat32x4(unsigned lane, FloatRegister rhs, 1463 FloatRegister lhsDest) { 1464 MacroAssemblerX86Shared::replaceLaneFloat32x4(lane, lhsDest, rhs, lhsDest); 1465 } 1466 1467 void MacroAssembler::replaceLaneFloat64x2(unsigned lane, FloatRegister lhs, 1468 FloatRegister rhs, 1469 FloatRegister dest) { 1470 MacroAssemblerX86Shared::replaceLaneFloat64x2(lane, lhs, rhs, dest); 1471 } 1472 1473 void MacroAssembler::replaceLaneFloat64x2(unsigned lane, FloatRegister rhs, 1474 FloatRegister lhsDest) { 1475 MacroAssemblerX86Shared::replaceLaneFloat64x2(lane, lhsDest, rhs, lhsDest); 1476 } 1477 1478 // Shuffle - permute with immediate indices 1479 1480 void MacroAssembler::shuffleInt8x16(const uint8_t lanes[16], FloatRegister rhs, 1481 FloatRegister lhsDest) { 1482 MacroAssemblerX86Shared::shuffleInt8x16(lhsDest, rhs, lhsDest, lanes); 1483 } 1484 1485 void MacroAssembler::shuffleInt8x16(const uint8_t lanes[16], FloatRegister lhs, 1486 FloatRegister rhs, FloatRegister dest) { 1487 MacroAssemblerX86Shared::shuffleInt8x16(lhs, rhs, dest, lanes); 1488 } 1489 1490 void MacroAssembler::blendInt8x16(const uint8_t lanes[16], FloatRegister lhs, 1491 FloatRegister rhs, FloatRegister dest, 1492 FloatRegister temp) { 1493 MacroAssemblerX86Shared::blendInt8x16(lhs, rhs, dest, temp, lanes); 1494 } 1495 1496 void MacroAssembler::blendInt16x8(const uint16_t lanes[8], FloatRegister lhs, 1497 FloatRegister rhs, FloatRegister dest) { 1498 MacroAssemblerX86Shared::blendInt16x8(lhs, rhs, dest, lanes); 1499 } 1500 1501 void MacroAssembler::laneSelectSimd128(FloatRegister mask, FloatRegister lhs, 1502 FloatRegister rhs, FloatRegister dest) { 1503 MacroAssemblerX86Shared::laneSelectSimd128(mask, lhs, rhs, dest); 1504 } 1505 1506 void MacroAssembler::interleaveHighInt16x8(FloatRegister lhs, FloatRegister rhs, 1507 FloatRegister dest) { 1508 vpunpckhwd(rhs, lhs, dest); 1509 } 1510 1511 void MacroAssembler::interleaveHighInt32x4(FloatRegister lhs, FloatRegister rhs, 1512 FloatRegister dest) { 1513 vpunpckhdq(rhs, lhs, dest); 1514 } 1515 1516 void MacroAssembler::interleaveHighInt64x2(FloatRegister lhs, FloatRegister rhs, 1517 FloatRegister dest) { 1518 vpunpckhqdq(rhs, lhs, dest); 1519 } 1520 1521 void MacroAssembler::interleaveHighInt8x16(FloatRegister lhs, FloatRegister rhs, 1522 FloatRegister dest) { 1523 vpunpckhbw(rhs, lhs, dest); 1524 } 1525 1526 void MacroAssembler::interleaveLowInt16x8(FloatRegister lhs, FloatRegister rhs, 1527 FloatRegister dest) { 1528 vpunpcklwd(rhs, lhs, dest); 1529 } 1530 1531 void MacroAssembler::interleaveLowInt32x4(FloatRegister lhs, FloatRegister rhs, 1532 FloatRegister dest) { 1533 vpunpckldq(rhs, lhs, dest); 1534 } 1535 1536 void MacroAssembler::interleaveLowInt64x2(FloatRegister lhs, FloatRegister rhs, 1537 FloatRegister dest) { 1538 vpunpcklqdq(rhs, lhs, dest); 1539 } 1540 1541 void MacroAssembler::interleaveLowInt8x16(FloatRegister lhs, FloatRegister rhs, 1542 FloatRegister dest) { 1543 vpunpcklbw(rhs, lhs, dest); 1544 } 1545 1546 void MacroAssembler::permuteInt8x16(const uint8_t lanes[16], FloatRegister src, 1547 FloatRegister dest) { 1548 src = moveSimd128IntIfNotAVX(src, dest); 1549 vpshufbSimd128(SimdConstant::CreateX16((const int8_t*)lanes), src, dest); 1550 } 1551 1552 void MacroAssembler::permuteLowInt16x8(const uint16_t lanes[4], 1553 FloatRegister src, FloatRegister dest) { 1554 MOZ_ASSERT(lanes[0] < 4 && lanes[1] < 4 && lanes[2] < 4 && lanes[3] < 4); 1555 vpshuflw(ComputeShuffleMask(lanes[0], lanes[1], lanes[2], lanes[3]), src, 1556 dest); 1557 } 1558 1559 void MacroAssembler::permuteHighInt16x8(const uint16_t lanes[4], 1560 FloatRegister src, FloatRegister dest) { 1561 MOZ_ASSERT(lanes[0] < 4 && lanes[1] < 4 && lanes[2] < 4 && lanes[3] < 4); 1562 vpshufhw(ComputeShuffleMask(lanes[0], lanes[1], lanes[2], lanes[3]), src, 1563 dest); 1564 } 1565 1566 void MacroAssembler::permuteInt32x4(const uint32_t lanes[4], FloatRegister src, 1567 FloatRegister dest) { 1568 vpshufd(ComputeShuffleMask(lanes[0], lanes[1], lanes[2], lanes[3]), src, 1569 dest); 1570 } 1571 1572 void MacroAssembler::concatAndRightShiftSimd128(FloatRegister lhs, 1573 FloatRegister rhs, 1574 FloatRegister dest, 1575 uint32_t shift) { 1576 vpalignr(Operand(rhs), lhs, dest, shift); 1577 } 1578 1579 void MacroAssembler::leftShiftSimd128(Imm32 count, FloatRegister src, 1580 FloatRegister dest) { 1581 src = moveSimd128IntIfNotAVX(src, dest); 1582 vpslldq(count, src, dest); 1583 } 1584 1585 void MacroAssembler::rightShiftSimd128(Imm32 count, FloatRegister src, 1586 FloatRegister dest) { 1587 src = moveSimd128IntIfNotAVX(src, dest); 1588 vpsrldq(count, src, dest); 1589 } 1590 1591 // Zero extend int values. 1592 1593 void MacroAssembler::zeroExtend8x16To16x8(FloatRegister src, 1594 FloatRegister dest) { 1595 src = moveSimd128IntIfNotAVX(src, dest); 1596 vpmovzxbw(Operand(src), dest); 1597 } 1598 1599 void MacroAssembler::zeroExtend8x16To32x4(FloatRegister src, 1600 FloatRegister dest) { 1601 src = moveSimd128IntIfNotAVX(src, dest); 1602 vpmovzxbd(Operand(src), dest); 1603 } 1604 1605 void MacroAssembler::zeroExtend8x16To64x2(FloatRegister src, 1606 FloatRegister dest) { 1607 src = moveSimd128IntIfNotAVX(src, dest); 1608 vpmovzxbq(Operand(src), dest); 1609 } 1610 1611 void MacroAssembler::zeroExtend16x8To32x4(FloatRegister src, 1612 FloatRegister dest) { 1613 src = moveSimd128IntIfNotAVX(src, dest); 1614 vpmovzxwd(Operand(src), dest); 1615 } 1616 1617 void MacroAssembler::zeroExtend16x8To64x2(FloatRegister src, 1618 FloatRegister dest) { 1619 src = moveSimd128IntIfNotAVX(src, dest); 1620 vpmovzxwq(Operand(src), dest); 1621 } 1622 1623 void MacroAssembler::zeroExtend32x4To64x2(FloatRegister src, 1624 FloatRegister dest) { 1625 src = moveSimd128IntIfNotAVX(src, dest); 1626 vpmovzxdq(Operand(src), dest); 1627 } 1628 1629 // Reverse bytes in lanes. 1630 1631 void MacroAssembler::reverseInt16x8(FloatRegister src, FloatRegister dest) { 1632 // Byteswap is MOV + PSLLW + PSRLW + POR, a small win over PSHUFB. 1633 ScratchSimd128Scope scratch(*this); 1634 FloatRegister srcForScratch = moveSimd128IntIfNotAVX(src, scratch); 1635 vpsrlw(Imm32(8), srcForScratch, scratch); 1636 src = moveSimd128IntIfNotAVX(src, dest); 1637 vpsllw(Imm32(8), src, dest); 1638 vpor(scratch, dest, dest); 1639 } 1640 1641 void MacroAssembler::reverseInt32x4(FloatRegister src, FloatRegister dest) { 1642 src = moveSimd128IntIfNotAVX(src, dest); 1643 int8_t lanes[] = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12}; 1644 vpshufbSimd128(SimdConstant::CreateX16((const int8_t*)lanes), src, dest); 1645 } 1646 1647 void MacroAssembler::reverseInt64x2(FloatRegister src, FloatRegister dest) { 1648 src = moveSimd128IntIfNotAVX(src, dest); 1649 int8_t lanes[] = {7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8}; 1650 vpshufbSimd128(SimdConstant::CreateX16((const int8_t*)lanes), src, dest); 1651 } 1652 1653 // Any lane true, ie any bit set 1654 1655 void MacroAssembler::anyTrueSimd128(FloatRegister src, Register dest) { 1656 bool destIsZero = maybeEmitSetZeroByteRegister(dest); 1657 1658 vptest(src, src); 1659 emitSet(Condition::NonZero, dest, destIsZero); 1660 } 1661 1662 // All lanes true 1663 1664 void MacroAssembler::allTrueInt8x16(FloatRegister src, Register dest) { 1665 bool destIsZero = maybeEmitSetZeroByteRegister(dest); 1666 1667 ScratchSimd128Scope xtmp(*this); 1668 // xtmp is all-00h 1669 vpxor(xtmp, xtmp, xtmp); 1670 // Set FFh if byte==0 otherwise 00h 1671 // Operand ordering constraint: lhs==output 1672 vpcmpeqb(Operand(src), xtmp, xtmp); 1673 // Check if xtmp is 0. 1674 vptest(xtmp, xtmp); 1675 emitSet(Condition::Zero, dest, destIsZero); 1676 } 1677 1678 void MacroAssembler::allTrueInt16x8(FloatRegister src, Register dest) { 1679 bool destIsZero = maybeEmitSetZeroByteRegister(dest); 1680 1681 ScratchSimd128Scope xtmp(*this); 1682 // xtmp is all-00h 1683 vpxor(xtmp, xtmp, xtmp); 1684 // Set FFFFh if word==0 otherwise 0000h 1685 // Operand ordering constraint: lhs==output 1686 vpcmpeqw(Operand(src), xtmp, xtmp); 1687 // Check if xtmp is 0. 1688 vptest(xtmp, xtmp); 1689 emitSet(Condition::Zero, dest, destIsZero); 1690 } 1691 1692 void MacroAssembler::allTrueInt32x4(FloatRegister src, Register dest) { 1693 bool destIsZero = maybeEmitSetZeroByteRegister(dest); 1694 1695 ScratchSimd128Scope xtmp(*this); 1696 // xtmp is all-00h 1697 vpxor(xtmp, xtmp, xtmp); 1698 // Set FFFFFFFFh if doubleword==0 otherwise 00000000h 1699 // Operand ordering constraint: lhs==output 1700 vpcmpeqd(Operand(src), xtmp, xtmp); 1701 // Check if xtmp is 0. 1702 vptest(xtmp, xtmp); 1703 emitSet(Condition::Zero, dest, destIsZero); 1704 } 1705 1706 void MacroAssembler::allTrueInt64x2(FloatRegister src, Register dest) { 1707 bool destIsZero = maybeEmitSetZeroByteRegister(dest); 1708 1709 ScratchSimd128Scope xtmp(*this); 1710 // xtmp is all-00h 1711 vpxor(xtmp, xtmp, xtmp); 1712 // Set FFFFFFFFFFFFFFFFh if quadword==0 otherwise 0000000000000000h 1713 // Operand ordering constraint: lhs==output 1714 vpcmpeqq(Operand(src), xtmp, xtmp); 1715 // Check if xtmp is 0. 1716 vptest(xtmp, xtmp); 1717 emitSet(Condition::Zero, dest, destIsZero); 1718 } 1719 1720 // Bitmask 1721 1722 void MacroAssembler::bitmaskInt8x16(FloatRegister src, Register dest) { 1723 vpmovmskb(src, dest); 1724 } 1725 1726 void MacroAssembler::bitmaskInt16x8(FloatRegister src, Register dest) { 1727 ScratchSimd128Scope scratch(*this); 1728 // A three-instruction sequence is possible by using scratch as a don't-care 1729 // input and shifting rather than masking at the end, but creates a false 1730 // dependency on the old value of scratch. The better fix is to allow src to 1731 // be clobbered. 1732 src = moveSimd128IntIfNotAVX(src, scratch); 1733 vpacksswb(Operand(src), src, scratch); 1734 vpmovmskb(scratch, dest); 1735 andl(Imm32(0xFF), dest); 1736 } 1737 1738 void MacroAssembler::bitmaskInt32x4(FloatRegister src, Register dest) { 1739 vmovmskps(src, dest); 1740 } 1741 1742 void MacroAssembler::bitmaskInt64x2(FloatRegister src, Register dest) { 1743 vmovmskpd(src, dest); 1744 } 1745 1746 // Swizzle - permute with variable indices 1747 1748 void MacroAssembler::swizzleInt8x16(FloatRegister lhs, FloatRegister rhs, 1749 FloatRegister dest) { 1750 ScratchSimd128Scope scratch(*this); 1751 rhs = moveSimd128IntIfNotAVX(rhs, scratch); 1752 // Set high bit to 1 for values > 15 via adding with saturation. 1753 vpaddusbSimd128(SimdConstant::SplatX16(0x70), rhs, scratch); 1754 vpshufb(scratch, lhs, dest); // permute 1755 } 1756 1757 void MacroAssembler::swizzleInt8x16Relaxed(FloatRegister lhs, FloatRegister rhs, 1758 FloatRegister dest) { 1759 vpshufb(rhs, lhs, dest); 1760 } 1761 1762 // Integer Add 1763 1764 void MacroAssembler::addInt8x16(FloatRegister lhs, FloatRegister rhs, 1765 FloatRegister dest) { 1766 vpaddb(Operand(rhs), lhs, dest); 1767 } 1768 1769 void MacroAssembler::addInt8x16(FloatRegister lhs, const SimdConstant& rhs, 1770 FloatRegister dest) { 1771 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpaddb, 1772 &MacroAssembler::vpaddbSimd128); 1773 } 1774 1775 void MacroAssembler::addInt16x8(FloatRegister lhs, FloatRegister rhs, 1776 FloatRegister dest) { 1777 vpaddw(Operand(rhs), lhs, dest); 1778 } 1779 1780 void MacroAssembler::addInt16x8(FloatRegister lhs, const SimdConstant& rhs, 1781 FloatRegister dest) { 1782 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpaddw, 1783 &MacroAssembler::vpaddwSimd128); 1784 } 1785 1786 void MacroAssembler::addInt32x4(FloatRegister lhs, FloatRegister rhs, 1787 FloatRegister dest) { 1788 vpaddd(Operand(rhs), lhs, dest); 1789 } 1790 1791 void MacroAssembler::addInt32x4(FloatRegister lhs, const SimdConstant& rhs, 1792 FloatRegister dest) { 1793 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpaddd, 1794 &MacroAssembler::vpadddSimd128); 1795 } 1796 1797 void MacroAssembler::addInt64x2(FloatRegister lhs, FloatRegister rhs, 1798 FloatRegister dest) { 1799 vpaddq(Operand(rhs), lhs, dest); 1800 } 1801 1802 void MacroAssembler::addInt64x2(FloatRegister lhs, const SimdConstant& rhs, 1803 FloatRegister dest) { 1804 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpaddq, 1805 &MacroAssembler::vpaddqSimd128); 1806 } 1807 1808 // Integer subtract 1809 1810 void MacroAssembler::subInt8x16(FloatRegister lhs, FloatRegister rhs, 1811 FloatRegister dest) { 1812 vpsubb(Operand(rhs), lhs, dest); 1813 } 1814 1815 void MacroAssembler::subInt8x16(FloatRegister lhs, const SimdConstant& rhs, 1816 FloatRegister dest) { 1817 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpsubb, 1818 &MacroAssembler::vpsubbSimd128); 1819 } 1820 1821 void MacroAssembler::subInt16x8(FloatRegister lhs, FloatRegister rhs, 1822 FloatRegister dest) { 1823 vpsubw(Operand(rhs), lhs, dest); 1824 } 1825 1826 void MacroAssembler::subInt16x8(FloatRegister lhs, const SimdConstant& rhs, 1827 FloatRegister dest) { 1828 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpsubw, 1829 &MacroAssembler::vpsubwSimd128); 1830 } 1831 1832 void MacroAssembler::subInt32x4(FloatRegister lhs, FloatRegister rhs, 1833 FloatRegister dest) { 1834 vpsubd(Operand(rhs), lhs, dest); 1835 } 1836 1837 void MacroAssembler::subInt32x4(FloatRegister lhs, const SimdConstant& rhs, 1838 FloatRegister dest) { 1839 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpsubd, 1840 &MacroAssembler::vpsubdSimd128); 1841 } 1842 1843 void MacroAssembler::subInt64x2(FloatRegister lhs, FloatRegister rhs, 1844 FloatRegister dest) { 1845 vpsubq(Operand(rhs), lhs, dest); 1846 } 1847 1848 void MacroAssembler::subInt64x2(FloatRegister lhs, const SimdConstant& rhs, 1849 FloatRegister dest) { 1850 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpsubq, 1851 &MacroAssembler::vpsubqSimd128); 1852 } 1853 1854 // Integer multiply 1855 1856 void MacroAssembler::mulInt16x8(FloatRegister lhs, FloatRegister rhs, 1857 FloatRegister dest) { 1858 vpmullw(Operand(rhs), lhs, dest); 1859 } 1860 1861 void MacroAssembler::mulInt16x8(FloatRegister lhs, const SimdConstant& rhs, 1862 FloatRegister dest) { 1863 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpmullw, 1864 &MacroAssembler::vpmullwSimd128); 1865 } 1866 1867 void MacroAssembler::mulInt32x4(FloatRegister lhs, FloatRegister rhs, 1868 FloatRegister dest) { 1869 vpmulld(Operand(rhs), lhs, dest); 1870 } 1871 1872 void MacroAssembler::mulInt32x4(FloatRegister lhs, const SimdConstant& rhs, 1873 FloatRegister dest) { 1874 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpmulld, 1875 &MacroAssembler::vpmulldSimd128); 1876 } 1877 1878 void MacroAssembler::mulInt64x2(FloatRegister lhs, FloatRegister rhs, 1879 FloatRegister dest, FloatRegister temp) { 1880 ScratchSimd128Scope temp2(*this); 1881 // lhs = <D C> <B A> 1882 // rhs = <H G> <F E> 1883 // result = <(DG+CH)_low+CG_high CG_low> <(BE+AF)_low+AE_high AE_low> 1884 FloatRegister lhsForTemp = 1885 moveSimd128IntIfNotAVX(lhs, temp); // temp = <D C> <B A> 1886 vpsrlq(Imm32(32), lhsForTemp, temp); // temp = <0 D> <0 B> 1887 vpmuludq(rhs, temp, temp); // temp = <DG> <BE> 1888 FloatRegister rhsForTemp = 1889 moveSimd128IntIfNotAVX(rhs, temp2); // temp2 = <H G> <F E> 1890 vpsrlq(Imm32(32), rhsForTemp, temp2); // temp2 = <0 H> <0 F> 1891 vpmuludq(lhs, temp2, temp2); // temp2 = <CH> <AF> 1892 vpaddq(Operand(temp), temp2, temp2); // temp2 = <DG+CH> <BE+AF> 1893 vpsllq(Imm32(32), temp2, temp2); // temp2 = <(DG+CH)_low 0> 1894 // <(BE+AF)_low 0> 1895 vpmuludq(rhs, lhs, dest); // dest = <CG_high CG_low> 1896 // <AE_high AE_low> 1897 vpaddq(Operand(temp2), dest, dest); // dest = 1898 // <(DG+CH)_low+CG_high CG_low> 1899 // <(BE+AF)_low+AE_high AE_low> 1900 } 1901 1902 void MacroAssembler::mulInt64x2(FloatRegister lhs, const SimdConstant& rhs, 1903 FloatRegister dest, FloatRegister temp) { 1904 // Check if we can specialize that to less than eight instructions 1905 // (in comparison with the above mulInt64x2 version). 1906 const int64_t* c = static_cast<const int64_t*>(rhs.bytes()); 1907 const int64_t val = c[0]; 1908 if (val == c[1]) { 1909 switch (mozilla::CountPopulation64(val)) { 1910 case 0: // val == 0 1911 vpxor(Operand(dest), dest, dest); 1912 return; 1913 case 64: // val == -1 1914 negInt64x2(lhs, dest); 1915 return; 1916 case 1: // val == power of 2 1917 if (val == 1) { 1918 moveSimd128Int(lhs, dest); 1919 } else { 1920 lhs = moveSimd128IntIfNotAVX(lhs, dest); 1921 vpsllq(Imm32(mozilla::CountTrailingZeroes64(val)), lhs, dest); 1922 } 1923 return; 1924 case 2: { 1925 // Constants with 2 bits set, such as 3, 5, 10, etc. 1926 int i0 = mozilla::CountTrailingZeroes64(val); 1927 int i1 = mozilla::CountTrailingZeroes64(val & (val - 1)); 1928 FloatRegister lhsForTemp = moveSimd128IntIfNotAVX(lhs, temp); 1929 vpsllq(Imm32(i1), lhsForTemp, temp); 1930 lhs = moveSimd128IntIfNotAVX(lhs, dest); 1931 if (i0 > 0) { 1932 vpsllq(Imm32(i0), lhs, dest); 1933 lhs = dest; 1934 } 1935 vpaddq(Operand(temp), lhs, dest); 1936 return; 1937 } 1938 case 63: { 1939 // Some constants with 1 bit unset, such as -2, -3, -5, etc. 1940 FloatRegister lhsForTemp = moveSimd128IntIfNotAVX(lhs, temp); 1941 vpsllq(Imm32(mozilla::CountTrailingZeroes64(~val)), lhsForTemp, temp); 1942 negInt64x2(lhs, dest); 1943 vpsubq(Operand(temp), dest, dest); 1944 return; 1945 } 1946 } 1947 } 1948 1949 // lhs = <D C> <B A> 1950 // rhs = <H G> <F E> 1951 // result = <(DG+CH)_low+CG_high CG_low> <(BE+AF)_low+AE_high AE_low> 1952 1953 if ((c[0] >> 32) == 0 && (c[1] >> 32) == 0) { 1954 // If the H and F == 0, simplify calculations: 1955 // result = <DG_low+CG_high CG_low> <BE_low+AE_high AE_low> 1956 const int64_t rhsShifted[2] = {c[0] << 32, c[1] << 32}; 1957 FloatRegister lhsForTemp = moveSimd128IntIfNotAVX(lhs, temp); 1958 vpmulldSimd128(SimdConstant::CreateSimd128(rhsShifted), lhsForTemp, temp); 1959 vpmuludqSimd128(rhs, lhs, dest); 1960 vpaddq(Operand(temp), dest, dest); 1961 return; 1962 } 1963 1964 const int64_t rhsSwapped[2] = { 1965 static_cast<int64_t>(static_cast<uint64_t>(c[0]) >> 32) | (c[0] << 32), 1966 static_cast<int64_t>(static_cast<uint64_t>(c[1]) >> 32) | (c[1] << 32), 1967 }; // rhsSwapped = <G H> <E F> 1968 FloatRegister lhsForTemp = moveSimd128IntIfNotAVX(lhs, temp); 1969 vpmulldSimd128(SimdConstant::CreateSimd128(rhsSwapped), lhsForTemp, 1970 temp); // temp = <DG CH> <BE AF> 1971 vphaddd(Operand(temp), temp, temp); // temp = <xx xx> <DG+CH BE+AF> 1972 vpmovzxdq(Operand(temp), temp); // temp = <0 DG+CG> <0 BE+AF> 1973 vpmuludqSimd128(rhs, lhs, dest); // dest = <CG_high CG_low> 1974 // <AE_high AE_low> 1975 vpsllq(Imm32(32), temp, temp); // temp = <(DG+CH)_low 0> 1976 // <(BE+AF)_low 0> 1977 vpaddq(Operand(temp), dest, dest); 1978 } 1979 1980 // Code generation from the PR: https://github.com/WebAssembly/simd/pull/376. 1981 // The double PSHUFD for the 32->64 case is not great, and there's some 1982 // discussion on the PR (scroll down far enough) on how to avoid one of them, 1983 // but we need benchmarking + correctness proofs. 1984 1985 void MacroAssembler::extMulLowInt8x16(FloatRegister lhs, FloatRegister rhs, 1986 FloatRegister dest) { 1987 ScratchSimd128Scope scratch(*this); 1988 widenLowInt8x16(rhs, scratch); 1989 widenLowInt8x16(lhs, dest); 1990 mulInt16x8(dest, scratch, dest); 1991 } 1992 1993 void MacroAssembler::extMulHighInt8x16(FloatRegister lhs, FloatRegister rhs, 1994 FloatRegister dest) { 1995 ScratchSimd128Scope scratch(*this); 1996 widenHighInt8x16(rhs, scratch); 1997 widenHighInt8x16(lhs, dest); 1998 mulInt16x8(dest, scratch, dest); 1999 } 2000 2001 void MacroAssembler::unsignedExtMulLowInt8x16(FloatRegister lhs, 2002 FloatRegister rhs, 2003 FloatRegister dest) { 2004 ScratchSimd128Scope scratch(*this); 2005 unsignedWidenLowInt8x16(rhs, scratch); 2006 unsignedWidenLowInt8x16(lhs, dest); 2007 mulInt16x8(dest, scratch, dest); 2008 } 2009 2010 void MacroAssembler::unsignedExtMulHighInt8x16(FloatRegister lhs, 2011 FloatRegister rhs, 2012 FloatRegister dest) { 2013 ScratchSimd128Scope scratch(*this); 2014 unsignedWidenHighInt8x16(rhs, scratch); 2015 unsignedWidenHighInt8x16(lhs, dest); 2016 mulInt16x8(dest, scratch, dest); 2017 } 2018 2019 void MacroAssembler::extMulLowInt16x8(FloatRegister lhs, FloatRegister rhs, 2020 FloatRegister dest) { 2021 ScratchSimd128Scope scratch(*this); 2022 FloatRegister lhsCopy = moveSimd128IntIfNotAVX(lhs, scratch); 2023 vpmulhw(Operand(rhs), lhsCopy, scratch); 2024 vpmullw(Operand(rhs), lhs, dest); 2025 vpunpcklwd(scratch, dest, dest); 2026 } 2027 2028 void MacroAssembler::extMulHighInt16x8(FloatRegister lhs, FloatRegister rhs, 2029 FloatRegister dest) { 2030 ScratchSimd128Scope scratch(*this); 2031 FloatRegister lhsCopy = moveSimd128IntIfNotAVX(lhs, scratch); 2032 vpmulhw(Operand(rhs), lhsCopy, scratch); 2033 vpmullw(Operand(rhs), lhs, dest); 2034 vpunpckhwd(scratch, dest, dest); 2035 } 2036 2037 void MacroAssembler::unsignedExtMulLowInt16x8(FloatRegister lhs, 2038 FloatRegister rhs, 2039 FloatRegister dest) { 2040 ScratchSimd128Scope scratch(*this); 2041 FloatRegister lhsCopy = moveSimd128IntIfNotAVX(lhs, scratch); 2042 vpmulhuw(Operand(rhs), lhsCopy, scratch); 2043 vpmullw(Operand(rhs), lhs, dest); 2044 vpunpcklwd(scratch, dest, dest); 2045 } 2046 2047 void MacroAssembler::unsignedExtMulHighInt16x8(FloatRegister lhs, 2048 FloatRegister rhs, 2049 FloatRegister dest) { 2050 ScratchSimd128Scope scratch(*this); 2051 FloatRegister lhsCopy = moveSimd128IntIfNotAVX(lhs, scratch); 2052 vpmulhuw(Operand(rhs), lhsCopy, scratch); 2053 vpmullw(Operand(rhs), lhs, dest); 2054 vpunpckhwd(scratch, dest, dest); 2055 } 2056 2057 void MacroAssembler::extMulLowInt32x4(FloatRegister lhs, FloatRegister rhs, 2058 FloatRegister dest) { 2059 ScratchSimd128Scope scratch(*this); 2060 vpshufd(ComputeShuffleMask(0, 0, 1, 0), lhs, scratch); 2061 vpshufd(ComputeShuffleMask(0, 0, 1, 0), rhs, dest); 2062 vpmuldq(scratch, dest, dest); 2063 } 2064 2065 void MacroAssembler::extMulHighInt32x4(FloatRegister lhs, FloatRegister rhs, 2066 FloatRegister dest) { 2067 ScratchSimd128Scope scratch(*this); 2068 vpshufd(ComputeShuffleMask(2, 0, 3, 0), lhs, scratch); 2069 vpshufd(ComputeShuffleMask(2, 0, 3, 0), rhs, dest); 2070 vpmuldq(scratch, dest, dest); 2071 } 2072 2073 void MacroAssembler::unsignedExtMulLowInt32x4(FloatRegister lhs, 2074 FloatRegister rhs, 2075 FloatRegister dest) { 2076 ScratchSimd128Scope scratch(*this); 2077 vpshufd(ComputeShuffleMask(0, 0, 1, 0), lhs, scratch); 2078 vpshufd(ComputeShuffleMask(0, 0, 1, 0), rhs, dest); 2079 vpmuludq(Operand(scratch), dest, dest); 2080 } 2081 2082 void MacroAssembler::unsignedExtMulHighInt32x4(FloatRegister lhs, 2083 FloatRegister rhs, 2084 FloatRegister dest) { 2085 ScratchSimd128Scope scratch(*this); 2086 vpshufd(ComputeShuffleMask(2, 0, 3, 0), lhs, scratch); 2087 vpshufd(ComputeShuffleMask(2, 0, 3, 0), rhs, dest); 2088 vpmuludq(Operand(scratch), dest, dest); 2089 } 2090 2091 void MacroAssembler::q15MulrSatInt16x8(FloatRegister lhs, FloatRegister rhs, 2092 FloatRegister dest) { 2093 ScratchSimd128Scope scratch(*this); 2094 vpmulhrsw(Operand(rhs), lhs, dest); 2095 FloatRegister destCopy = moveSimd128IntIfNotAVX(dest, scratch); 2096 vpcmpeqwSimd128(SimdConstant::SplatX8(0x8000), destCopy, scratch); 2097 vpxor(scratch, dest, dest); 2098 } 2099 2100 void MacroAssembler::q15MulrInt16x8Relaxed(FloatRegister lhs, FloatRegister rhs, 2101 FloatRegister dest) { 2102 vpmulhrsw(Operand(rhs), lhs, dest); 2103 } 2104 2105 // Integer negate 2106 2107 void MacroAssembler::negInt8x16(FloatRegister src, FloatRegister dest) { 2108 ScratchSimd128Scope scratch(*this); 2109 if (src == dest) { 2110 moveSimd128Int(src, scratch); 2111 src = scratch; 2112 } 2113 vpxor(Operand(dest), dest, dest); 2114 vpsubb(Operand(src), dest, dest); 2115 } 2116 2117 void MacroAssembler::negInt16x8(FloatRegister src, FloatRegister dest) { 2118 ScratchSimd128Scope scratch(*this); 2119 if (src == dest) { 2120 moveSimd128Int(src, scratch); 2121 src = scratch; 2122 } 2123 vpxor(Operand(dest), dest, dest); 2124 vpsubw(Operand(src), dest, dest); 2125 } 2126 2127 void MacroAssembler::negInt32x4(FloatRegister src, FloatRegister dest) { 2128 ScratchSimd128Scope scratch(*this); 2129 if (src == dest) { 2130 moveSimd128Int(src, scratch); 2131 src = scratch; 2132 } 2133 vpxor(Operand(dest), dest, dest); 2134 vpsubd(Operand(src), dest, dest); 2135 } 2136 2137 void MacroAssembler::negInt64x2(FloatRegister src, FloatRegister dest) { 2138 ScratchSimd128Scope scratch(*this); 2139 if (src == dest) { 2140 moveSimd128Int(src, scratch); 2141 src = scratch; 2142 } 2143 vpxor(Operand(dest), dest, dest); 2144 vpsubq(Operand(src), dest, dest); 2145 } 2146 2147 // Saturating integer add 2148 2149 void MacroAssembler::addSatInt8x16(FloatRegister lhs, FloatRegister rhs, 2150 FloatRegister dest) { 2151 vpaddsb(Operand(rhs), lhs, dest); 2152 } 2153 2154 void MacroAssembler::addSatInt8x16(FloatRegister lhs, const SimdConstant& rhs, 2155 FloatRegister dest) { 2156 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpaddsb, 2157 &MacroAssembler::vpaddsbSimd128); 2158 } 2159 2160 void MacroAssembler::unsignedAddSatInt8x16(FloatRegister lhs, FloatRegister rhs, 2161 FloatRegister dest) { 2162 vpaddusb(Operand(rhs), lhs, dest); 2163 } 2164 2165 void MacroAssembler::unsignedAddSatInt8x16(FloatRegister lhs, 2166 const SimdConstant& rhs, 2167 FloatRegister dest) { 2168 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpaddusb, 2169 &MacroAssembler::vpaddusbSimd128); 2170 } 2171 2172 void MacroAssembler::addSatInt16x8(FloatRegister lhs, FloatRegister rhs, 2173 FloatRegister dest) { 2174 vpaddsw(Operand(rhs), lhs, dest); 2175 } 2176 2177 void MacroAssembler::addSatInt16x8(FloatRegister lhs, const SimdConstant& rhs, 2178 FloatRegister dest) { 2179 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpaddsw, 2180 &MacroAssembler::vpaddswSimd128); 2181 } 2182 2183 void MacroAssembler::unsignedAddSatInt16x8(FloatRegister lhs, FloatRegister rhs, 2184 FloatRegister dest) { 2185 vpaddusw(Operand(rhs), lhs, dest); 2186 } 2187 2188 void MacroAssembler::unsignedAddSatInt16x8(FloatRegister lhs, 2189 const SimdConstant& rhs, 2190 FloatRegister dest) { 2191 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpaddusw, 2192 &MacroAssembler::vpadduswSimd128); 2193 } 2194 2195 // Saturating integer subtract 2196 2197 void MacroAssembler::subSatInt8x16(FloatRegister lhs, FloatRegister rhs, 2198 FloatRegister dest) { 2199 vpsubsb(Operand(rhs), lhs, dest); 2200 } 2201 2202 void MacroAssembler::subSatInt8x16(FloatRegister lhs, const SimdConstant& rhs, 2203 FloatRegister dest) { 2204 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpsubsb, 2205 &MacroAssembler::vpsubsbSimd128); 2206 } 2207 2208 void MacroAssembler::unsignedSubSatInt8x16(FloatRegister lhs, FloatRegister rhs, 2209 FloatRegister dest) { 2210 vpsubusb(Operand(rhs), lhs, dest); 2211 } 2212 2213 void MacroAssembler::unsignedSubSatInt8x16(FloatRegister lhs, 2214 const SimdConstant& rhs, 2215 FloatRegister dest) { 2216 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpsubusb, 2217 &MacroAssembler::vpsubusbSimd128); 2218 } 2219 2220 void MacroAssembler::subSatInt16x8(FloatRegister lhs, FloatRegister rhs, 2221 FloatRegister dest) { 2222 vpsubsw(Operand(rhs), lhs, dest); 2223 } 2224 2225 void MacroAssembler::subSatInt16x8(FloatRegister lhs, const SimdConstant& rhs, 2226 FloatRegister dest) { 2227 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpsubsw, 2228 &MacroAssembler::vpsubswSimd128); 2229 } 2230 2231 void MacroAssembler::unsignedSubSatInt16x8(FloatRegister lhs, FloatRegister rhs, 2232 FloatRegister dest) { 2233 vpsubusw(Operand(rhs), lhs, dest); 2234 } 2235 2236 void MacroAssembler::unsignedSubSatInt16x8(FloatRegister lhs, 2237 const SimdConstant& rhs, 2238 FloatRegister dest) { 2239 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpsubusw, 2240 &MacroAssembler::vpsubuswSimd128); 2241 } 2242 2243 // Lane-wise integer minimum 2244 2245 void MacroAssembler::minInt8x16(FloatRegister lhs, FloatRegister rhs, 2246 FloatRegister dest) { 2247 vpminsb(Operand(rhs), lhs, dest); 2248 } 2249 2250 void MacroAssembler::minInt8x16(FloatRegister lhs, const SimdConstant& rhs, 2251 FloatRegister dest) { 2252 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpminsb, 2253 &MacroAssembler::vpminsbSimd128); 2254 } 2255 2256 void MacroAssembler::unsignedMinInt8x16(FloatRegister lhs, FloatRegister rhs, 2257 FloatRegister dest) { 2258 vpminub(Operand(rhs), lhs, dest); 2259 } 2260 2261 void MacroAssembler::unsignedMinInt8x16(FloatRegister lhs, 2262 const SimdConstant& rhs, 2263 FloatRegister dest) { 2264 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpminub, 2265 &MacroAssembler::vpminubSimd128); 2266 } 2267 2268 void MacroAssembler::minInt16x8(FloatRegister lhs, FloatRegister rhs, 2269 FloatRegister dest) { 2270 vpminsw(Operand(rhs), lhs, dest); 2271 } 2272 2273 void MacroAssembler::minInt16x8(FloatRegister lhs, const SimdConstant& rhs, 2274 FloatRegister dest) { 2275 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpminsw, 2276 &MacroAssembler::vpminswSimd128); 2277 } 2278 2279 void MacroAssembler::unsignedMinInt16x8(FloatRegister lhs, FloatRegister rhs, 2280 FloatRegister dest) { 2281 vpminuw(Operand(rhs), lhs, dest); 2282 } 2283 2284 void MacroAssembler::unsignedMinInt16x8(FloatRegister lhs, 2285 const SimdConstant& rhs, 2286 FloatRegister dest) { 2287 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpminuw, 2288 &MacroAssembler::vpminuwSimd128); 2289 } 2290 2291 void MacroAssembler::minInt32x4(FloatRegister lhs, FloatRegister rhs, 2292 FloatRegister dest) { 2293 vpminsd(Operand(rhs), lhs, dest); 2294 } 2295 2296 void MacroAssembler::minInt32x4(FloatRegister lhs, const SimdConstant& rhs, 2297 FloatRegister dest) { 2298 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpminsd, 2299 &MacroAssembler::vpminsdSimd128); 2300 } 2301 2302 void MacroAssembler::unsignedMinInt32x4(FloatRegister lhs, FloatRegister rhs, 2303 FloatRegister dest) { 2304 vpminud(Operand(rhs), lhs, dest); 2305 } 2306 2307 void MacroAssembler::unsignedMinInt32x4(FloatRegister lhs, 2308 const SimdConstant& rhs, 2309 FloatRegister dest) { 2310 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpminud, 2311 &MacroAssembler::vpminudSimd128); 2312 } 2313 2314 // Lane-wise integer maximum 2315 2316 void MacroAssembler::maxInt8x16(FloatRegister lhs, FloatRegister rhs, 2317 FloatRegister dest) { 2318 vpmaxsb(Operand(rhs), lhs, dest); 2319 } 2320 2321 void MacroAssembler::maxInt8x16(FloatRegister lhs, const SimdConstant& rhs, 2322 FloatRegister dest) { 2323 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpmaxsb, 2324 &MacroAssembler::vpmaxsbSimd128); 2325 } 2326 2327 void MacroAssembler::unsignedMaxInt8x16(FloatRegister lhs, FloatRegister rhs, 2328 FloatRegister dest) { 2329 vpmaxub(Operand(rhs), lhs, dest); 2330 } 2331 2332 void MacroAssembler::unsignedMaxInt8x16(FloatRegister lhs, 2333 const SimdConstant& rhs, 2334 FloatRegister dest) { 2335 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpmaxub, 2336 &MacroAssembler::vpmaxubSimd128); 2337 } 2338 2339 void MacroAssembler::maxInt16x8(FloatRegister lhs, FloatRegister rhs, 2340 FloatRegister dest) { 2341 vpmaxsw(Operand(rhs), lhs, dest); 2342 } 2343 2344 void MacroAssembler::maxInt16x8(FloatRegister lhs, const SimdConstant& rhs, 2345 FloatRegister dest) { 2346 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpmaxsw, 2347 &MacroAssembler::vpmaxswSimd128); 2348 } 2349 2350 void MacroAssembler::unsignedMaxInt16x8(FloatRegister lhs, FloatRegister rhs, 2351 FloatRegister dest) { 2352 vpmaxuw(Operand(rhs), lhs, dest); 2353 } 2354 2355 void MacroAssembler::unsignedMaxInt16x8(FloatRegister lhs, 2356 const SimdConstant& rhs, 2357 FloatRegister dest) { 2358 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpmaxuw, 2359 &MacroAssembler::vpmaxuwSimd128); 2360 } 2361 2362 void MacroAssembler::maxInt32x4(FloatRegister lhs, FloatRegister rhs, 2363 FloatRegister dest) { 2364 vpmaxsd(Operand(rhs), lhs, dest); 2365 } 2366 2367 void MacroAssembler::maxInt32x4(FloatRegister lhs, const SimdConstant& rhs, 2368 FloatRegister dest) { 2369 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpmaxsd, 2370 &MacroAssembler::vpmaxsdSimd128); 2371 } 2372 2373 void MacroAssembler::unsignedMaxInt32x4(FloatRegister lhs, FloatRegister rhs, 2374 FloatRegister dest) { 2375 vpmaxud(Operand(rhs), lhs, dest); 2376 } 2377 2378 void MacroAssembler::unsignedMaxInt32x4(FloatRegister lhs, 2379 const SimdConstant& rhs, 2380 FloatRegister dest) { 2381 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpmaxud, 2382 &MacroAssembler::vpmaxudSimd128); 2383 } 2384 2385 // Lane-wise integer rounding average 2386 2387 void MacroAssembler::unsignedAverageInt8x16(FloatRegister lhs, 2388 FloatRegister rhs, 2389 FloatRegister dest) { 2390 vpavgb(Operand(rhs), lhs, dest); 2391 } 2392 2393 void MacroAssembler::unsignedAverageInt16x8(FloatRegister lhs, 2394 FloatRegister rhs, 2395 FloatRegister dest) { 2396 vpavgw(Operand(rhs), lhs, dest); 2397 } 2398 2399 // Lane-wise integer absolute value 2400 2401 void MacroAssembler::absInt8x16(FloatRegister src, FloatRegister dest) { 2402 vpabsb(Operand(src), dest); 2403 } 2404 2405 void MacroAssembler::absInt16x8(FloatRegister src, FloatRegister dest) { 2406 vpabsw(Operand(src), dest); 2407 } 2408 2409 void MacroAssembler::absInt32x4(FloatRegister src, FloatRegister dest) { 2410 vpabsd(Operand(src), dest); 2411 } 2412 2413 void MacroAssembler::absInt64x2(FloatRegister src, FloatRegister dest) { 2414 ScratchSimd128Scope scratch(*this); 2415 signReplicationInt64x2(src, scratch); 2416 src = moveSimd128IntIfNotAVX(src, dest); 2417 vpxor(Operand(scratch), src, dest); 2418 vpsubq(Operand(scratch), dest, dest); 2419 } 2420 2421 // Left shift by scalar 2422 2423 void MacroAssembler::leftShiftInt8x16(Register rhs, FloatRegister lhsDest, 2424 FloatRegister temp) { 2425 MacroAssemblerX86Shared::packedLeftShiftByScalarInt8x16(lhsDest, rhs, temp, 2426 lhsDest); 2427 } 2428 2429 void MacroAssembler::leftShiftInt8x16(Imm32 count, FloatRegister src, 2430 FloatRegister dest) { 2431 MacroAssemblerX86Shared::packedLeftShiftByScalarInt8x16(count, src, dest); 2432 } 2433 2434 void MacroAssembler::leftShiftInt16x8(Register rhs, FloatRegister lhsDest) { 2435 MacroAssemblerX86Shared::packedLeftShiftByScalarInt16x8(lhsDest, rhs, 2436 lhsDest); 2437 } 2438 2439 void MacroAssembler::leftShiftInt16x8(Imm32 count, FloatRegister src, 2440 FloatRegister dest) { 2441 src = moveSimd128IntIfNotAVX(src, dest); 2442 vpsllw(count, src, dest); 2443 } 2444 2445 void MacroAssembler::leftShiftInt32x4(Register rhs, FloatRegister lhsDest) { 2446 MacroAssemblerX86Shared::packedLeftShiftByScalarInt32x4(lhsDest, rhs, 2447 lhsDest); 2448 } 2449 2450 void MacroAssembler::leftShiftInt32x4(Imm32 count, FloatRegister src, 2451 FloatRegister dest) { 2452 src = moveSimd128IntIfNotAVX(src, dest); 2453 vpslld(count, src, dest); 2454 } 2455 2456 void MacroAssembler::leftShiftInt64x2(Register rhs, FloatRegister lhsDest) { 2457 MacroAssemblerX86Shared::packedLeftShiftByScalarInt64x2(lhsDest, rhs, 2458 lhsDest); 2459 } 2460 2461 void MacroAssembler::leftShiftInt64x2(Imm32 count, FloatRegister src, 2462 FloatRegister dest) { 2463 src = moveSimd128IntIfNotAVX(src, dest); 2464 vpsllq(count, src, dest); 2465 } 2466 2467 // Right shift by scalar 2468 2469 void MacroAssembler::rightShiftInt8x16(Register rhs, FloatRegister lhsDest, 2470 FloatRegister temp) { 2471 MacroAssemblerX86Shared::packedRightShiftByScalarInt8x16(lhsDest, rhs, temp, 2472 lhsDest); 2473 } 2474 2475 void MacroAssembler::rightShiftInt8x16(Imm32 count, FloatRegister src, 2476 FloatRegister dest) { 2477 MacroAssemblerX86Shared::packedRightShiftByScalarInt8x16(count, src, dest); 2478 } 2479 2480 void MacroAssembler::unsignedRightShiftInt8x16(Register rhs, 2481 FloatRegister lhsDest, 2482 FloatRegister temp) { 2483 MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt8x16( 2484 lhsDest, rhs, temp, lhsDest); 2485 } 2486 2487 void MacroAssembler::unsignedRightShiftInt8x16(Imm32 count, FloatRegister src, 2488 FloatRegister dest) { 2489 MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt8x16(count, src, 2490 dest); 2491 } 2492 2493 void MacroAssembler::rightShiftInt16x8(Register rhs, FloatRegister lhsDest) { 2494 MacroAssemblerX86Shared::packedRightShiftByScalarInt16x8(lhsDest, rhs, 2495 lhsDest); 2496 } 2497 2498 void MacroAssembler::rightShiftInt16x8(Imm32 count, FloatRegister src, 2499 FloatRegister dest) { 2500 src = moveSimd128IntIfNotAVX(src, dest); 2501 vpsraw(count, src, dest); 2502 } 2503 2504 void MacroAssembler::unsignedRightShiftInt16x8(Register rhs, 2505 FloatRegister lhsDest) { 2506 MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt16x8(lhsDest, rhs, 2507 lhsDest); 2508 } 2509 2510 void MacroAssembler::unsignedRightShiftInt16x8(Imm32 count, FloatRegister src, 2511 FloatRegister dest) { 2512 src = moveSimd128IntIfNotAVX(src, dest); 2513 vpsrlw(count, src, dest); 2514 } 2515 2516 void MacroAssembler::rightShiftInt32x4(Register rhs, FloatRegister lhsDest) { 2517 MacroAssemblerX86Shared::packedRightShiftByScalarInt32x4(lhsDest, rhs, 2518 lhsDest); 2519 } 2520 2521 void MacroAssembler::rightShiftInt32x4(Imm32 count, FloatRegister src, 2522 FloatRegister dest) { 2523 src = moveSimd128IntIfNotAVX(src, dest); 2524 vpsrad(count, src, dest); 2525 } 2526 2527 void MacroAssembler::unsignedRightShiftInt32x4(Register rhs, 2528 FloatRegister lhsDest) { 2529 MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt32x4(lhsDest, rhs, 2530 lhsDest); 2531 } 2532 2533 void MacroAssembler::unsignedRightShiftInt32x4(Imm32 count, FloatRegister src, 2534 FloatRegister dest) { 2535 src = moveSimd128IntIfNotAVX(src, dest); 2536 vpsrld(count, src, dest); 2537 } 2538 2539 void MacroAssembler::rightShiftInt64x2(Register rhs, FloatRegister lhsDest, 2540 FloatRegister temp) { 2541 MacroAssemblerX86Shared::packedRightShiftByScalarInt64x2(lhsDest, rhs, temp, 2542 lhsDest); 2543 } 2544 2545 void MacroAssembler::rightShiftInt64x2(Imm32 count, FloatRegister src, 2546 FloatRegister dest) { 2547 MacroAssemblerX86Shared::packedRightShiftByScalarInt64x2(count, src, dest); 2548 } 2549 2550 void MacroAssembler::unsignedRightShiftInt64x2(Register rhs, 2551 FloatRegister lhsDest) { 2552 MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt64x2(lhsDest, rhs, 2553 lhsDest); 2554 } 2555 2556 void MacroAssembler::unsignedRightShiftInt64x2(Imm32 count, FloatRegister src, 2557 FloatRegister dest) { 2558 src = moveSimd128IntIfNotAVX(src, dest); 2559 vpsrlq(count, src, dest); 2560 } 2561 2562 // Sign replication operation 2563 2564 void MacroAssembler::signReplicationInt8x16(FloatRegister src, 2565 FloatRegister dest) { 2566 MOZ_ASSERT(src != dest); 2567 vpxor(Operand(dest), dest, dest); 2568 vpcmpgtb(Operand(src), dest, dest); 2569 } 2570 2571 void MacroAssembler::signReplicationInt16x8(FloatRegister src, 2572 FloatRegister dest) { 2573 src = moveSimd128IntIfNotAVX(src, dest); 2574 vpsraw(Imm32(15), src, dest); 2575 } 2576 2577 void MacroAssembler::signReplicationInt32x4(FloatRegister src, 2578 FloatRegister dest) { 2579 src = moveSimd128IntIfNotAVX(src, dest); 2580 vpsrad(Imm32(31), src, dest); 2581 } 2582 2583 void MacroAssembler::signReplicationInt64x2(FloatRegister src, 2584 FloatRegister dest) { 2585 vpshufd(ComputeShuffleMask(1, 1, 3, 3), src, dest); 2586 vpsrad(Imm32(31), dest, dest); 2587 } 2588 2589 // Bitwise and, or, xor, not 2590 2591 void MacroAssembler::bitwiseAndSimd128(FloatRegister rhs, 2592 FloatRegister lhsDest) { 2593 vpand(Operand(rhs), lhsDest, lhsDest); 2594 } 2595 2596 void MacroAssembler::bitwiseAndSimd128(FloatRegister lhs, FloatRegister rhs, 2597 FloatRegister dest) { 2598 vpand(Operand(rhs), lhs, dest); 2599 } 2600 2601 void MacroAssembler::bitwiseAndSimd128(FloatRegister lhs, 2602 const SimdConstant& rhs, 2603 FloatRegister dest) { 2604 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpand, 2605 &MacroAssembler::vpandSimd128); 2606 } 2607 2608 void MacroAssembler::bitwiseOrSimd128(FloatRegister rhs, 2609 FloatRegister lhsDest) { 2610 vpor(Operand(rhs), lhsDest, lhsDest); 2611 } 2612 2613 void MacroAssembler::bitwiseOrSimd128(FloatRegister lhs, FloatRegister rhs, 2614 FloatRegister dest) { 2615 vpor(Operand(rhs), lhs, dest); 2616 } 2617 2618 void MacroAssembler::bitwiseOrSimd128(FloatRegister lhs, 2619 const SimdConstant& rhs, 2620 FloatRegister dest) { 2621 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpor, 2622 &MacroAssembler::vporSimd128); 2623 } 2624 2625 void MacroAssembler::bitwiseXorSimd128(FloatRegister rhs, 2626 FloatRegister lhsDest) { 2627 vpxor(Operand(rhs), lhsDest, lhsDest); 2628 } 2629 2630 void MacroAssembler::bitwiseXorSimd128(FloatRegister lhs, FloatRegister rhs, 2631 FloatRegister dest) { 2632 vpxor(Operand(rhs), lhs, dest); 2633 } 2634 2635 void MacroAssembler::bitwiseXorSimd128(FloatRegister lhs, 2636 const SimdConstant& rhs, 2637 FloatRegister dest) { 2638 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpxor, 2639 &MacroAssembler::vpxorSimd128); 2640 } 2641 2642 void MacroAssembler::bitwiseNotSimd128(FloatRegister src, FloatRegister dest) { 2643 src = moveSimd128IntIfNotAVX(src, dest); 2644 bitwiseXorSimd128(src, SimdConstant::SplatX16(-1), dest); 2645 } 2646 2647 // Bitwise and-not 2648 2649 void MacroAssembler::bitwiseNotAndSimd128(FloatRegister rhs, 2650 FloatRegister lhsDest) { 2651 vpandn(Operand(rhs), lhsDest, lhsDest); 2652 } 2653 2654 void MacroAssembler::bitwiseNotAndSimd128(FloatRegister lhs, FloatRegister rhs, 2655 FloatRegister dest) { 2656 vpandn(Operand(rhs), lhs, dest); 2657 } 2658 2659 // Bitwise select 2660 2661 void MacroAssembler::bitwiseSelectSimd128(FloatRegister mask, 2662 FloatRegister onTrue, 2663 FloatRegister onFalse, 2664 FloatRegister dest, 2665 FloatRegister temp) { 2666 MacroAssemblerX86Shared::selectSimd128(mask, onTrue, onFalse, temp, dest); 2667 } 2668 2669 // Population count 2670 2671 void MacroAssembler::popcntInt8x16(FloatRegister src, FloatRegister dest, 2672 FloatRegister temp) { 2673 MacroAssemblerX86Shared::popcntInt8x16(src, temp, dest); 2674 } 2675 2676 // Comparisons (integer and floating-point) 2677 2678 void MacroAssembler::compareInt8x16(Assembler::Condition cond, 2679 FloatRegister rhs, FloatRegister lhsDest) { 2680 MacroAssemblerX86Shared::compareInt8x16(lhsDest, Operand(rhs), cond, lhsDest); 2681 } 2682 2683 void MacroAssembler::compareInt8x16(Assembler::Condition cond, 2684 FloatRegister lhs, FloatRegister rhs, 2685 FloatRegister dest) { 2686 MacroAssemblerX86Shared::compareInt8x16(lhs, Operand(rhs), cond, dest); 2687 } 2688 2689 void MacroAssembler::compareInt8x16(Assembler::Condition cond, 2690 FloatRegister lhs, const SimdConstant& rhs, 2691 FloatRegister dest) { 2692 MOZ_ASSERT(cond != Assembler::Condition::LessThan && 2693 cond != Assembler::Condition::GreaterThanOrEqual); 2694 MacroAssemblerX86Shared::compareInt8x16(cond, lhs, rhs, dest); 2695 } 2696 2697 void MacroAssembler::compareInt16x8(Assembler::Condition cond, 2698 FloatRegister rhs, FloatRegister lhsDest) { 2699 MacroAssemblerX86Shared::compareInt16x8(lhsDest, Operand(rhs), cond, lhsDest); 2700 } 2701 2702 void MacroAssembler::compareInt16x8(Assembler::Condition cond, 2703 FloatRegister lhs, FloatRegister rhs, 2704 FloatRegister dest) { 2705 MacroAssemblerX86Shared::compareInt16x8(lhs, Operand(rhs), cond, dest); 2706 } 2707 2708 void MacroAssembler::compareInt16x8(Assembler::Condition cond, 2709 FloatRegister lhs, const SimdConstant& rhs, 2710 FloatRegister dest) { 2711 MOZ_ASSERT(cond != Assembler::Condition::LessThan && 2712 cond != Assembler::Condition::GreaterThanOrEqual); 2713 MacroAssemblerX86Shared::compareInt16x8(cond, lhs, rhs, dest); 2714 } 2715 2716 void MacroAssembler::compareInt32x4(Assembler::Condition cond, 2717 FloatRegister rhs, FloatRegister lhsDest) { 2718 MacroAssemblerX86Shared::compareInt32x4(lhsDest, Operand(rhs), cond, lhsDest); 2719 } 2720 2721 void MacroAssembler::compareInt32x4(Assembler::Condition cond, 2722 FloatRegister lhs, FloatRegister rhs, 2723 FloatRegister dest) { 2724 MacroAssemblerX86Shared::compareInt32x4(lhs, Operand(rhs), cond, dest); 2725 } 2726 2727 void MacroAssembler::compareInt32x4(Assembler::Condition cond, 2728 FloatRegister lhs, const SimdConstant& rhs, 2729 FloatRegister dest) { 2730 MOZ_ASSERT(cond != Assembler::Condition::LessThan && 2731 cond != Assembler::Condition::GreaterThanOrEqual); 2732 MacroAssemblerX86Shared::compareInt32x4(cond, lhs, rhs, dest); 2733 } 2734 2735 void MacroAssembler::compareForEqualityInt64x2(Assembler::Condition cond, 2736 FloatRegister lhs, 2737 FloatRegister rhs, 2738 FloatRegister dest) { 2739 MacroAssemblerX86Shared::compareForEqualityInt64x2(lhs, Operand(rhs), cond, 2740 dest); 2741 } 2742 2743 void MacroAssembler::compareForOrderingInt64x2( 2744 Assembler::Condition cond, FloatRegister lhs, FloatRegister rhs, 2745 FloatRegister dest, FloatRegister temp1, FloatRegister temp2) { 2746 if (HasAVX() && HasSSE42()) { 2747 MacroAssemblerX86Shared::compareForOrderingInt64x2AVX(lhs, rhs, cond, dest); 2748 } else { 2749 MacroAssemblerX86Shared::compareForOrderingInt64x2(lhs, Operand(rhs), cond, 2750 temp1, temp2, dest); 2751 } 2752 } 2753 2754 void MacroAssembler::compareFloat32x4(Assembler::Condition cond, 2755 FloatRegister rhs, 2756 FloatRegister lhsDest) { 2757 // Code in the SIMD implementation allows operands to be reversed like this, 2758 // this benefits the baseline compiler. Ion takes care of the reversing 2759 // itself and never generates GT/GE. 2760 if (cond == Assembler::GreaterThan) { 2761 MacroAssemblerX86Shared::compareFloat32x4(rhs, Operand(lhsDest), 2762 Assembler::LessThan, lhsDest); 2763 } else if (cond == Assembler::GreaterThanOrEqual) { 2764 MacroAssemblerX86Shared::compareFloat32x4( 2765 rhs, Operand(lhsDest), Assembler::LessThanOrEqual, lhsDest); 2766 } else { 2767 MacroAssemblerX86Shared::compareFloat32x4(lhsDest, Operand(rhs), cond, 2768 lhsDest); 2769 } 2770 } 2771 2772 void MacroAssembler::compareFloat32x4(Assembler::Condition cond, 2773 FloatRegister lhs, FloatRegister rhs, 2774 FloatRegister dest) { 2775 MacroAssemblerX86Shared::compareFloat32x4(lhs, Operand(rhs), cond, dest); 2776 } 2777 2778 void MacroAssembler::compareFloat32x4(Assembler::Condition cond, 2779 FloatRegister lhs, 2780 const SimdConstant& rhs, 2781 FloatRegister dest) { 2782 MOZ_ASSERT(cond != Assembler::Condition::GreaterThan && 2783 cond != Assembler::Condition::GreaterThanOrEqual); 2784 MacroAssemblerX86Shared::compareFloat32x4(cond, lhs, rhs, dest); 2785 } 2786 2787 void MacroAssembler::compareFloat64x2(Assembler::Condition cond, 2788 FloatRegister rhs, 2789 FloatRegister lhsDest) { 2790 compareFloat64x2(cond, lhsDest, rhs, lhsDest); 2791 } 2792 2793 void MacroAssembler::compareFloat64x2(Assembler::Condition cond, 2794 FloatRegister lhs, FloatRegister rhs, 2795 FloatRegister dest) { 2796 // Code in the SIMD implementation allows operands to be reversed like this, 2797 // this benefits the baseline compiler. Ion takes care of the reversing 2798 // itself and never generates GT/GE. 2799 if (cond == Assembler::GreaterThan) { 2800 MacroAssemblerX86Shared::compareFloat64x2(rhs, Operand(lhs), 2801 Assembler::LessThan, dest); 2802 } else if (cond == Assembler::GreaterThanOrEqual) { 2803 MacroAssemblerX86Shared::compareFloat64x2(rhs, Operand(lhs), 2804 Assembler::LessThanOrEqual, dest); 2805 } else { 2806 MacroAssemblerX86Shared::compareFloat64x2(lhs, Operand(rhs), cond, dest); 2807 } 2808 } 2809 2810 void MacroAssembler::compareFloat64x2(Assembler::Condition cond, 2811 FloatRegister lhs, 2812 const SimdConstant& rhs, 2813 FloatRegister dest) { 2814 MOZ_ASSERT(cond != Assembler::Condition::GreaterThan && 2815 cond != Assembler::Condition::GreaterThanOrEqual); 2816 MacroAssemblerX86Shared::compareFloat64x2(cond, lhs, rhs, dest); 2817 } 2818 2819 // Load. See comments above regarding integer operation. 2820 2821 void MacroAssembler::loadUnalignedSimd128(const Operand& src, 2822 FloatRegister dest) { 2823 loadUnalignedSimd128Int(src, dest); 2824 } 2825 2826 FaultingCodeOffset MacroAssembler::loadUnalignedSimd128(const Address& src, 2827 FloatRegister dest) { 2828 return loadUnalignedSimd128Int(src, dest); 2829 } 2830 2831 FaultingCodeOffset MacroAssembler::loadUnalignedSimd128(const BaseIndex& src, 2832 FloatRegister dest) { 2833 return loadUnalignedSimd128Int(src, dest); 2834 } 2835 2836 // Store. See comments above regarding integer operation. 2837 2838 FaultingCodeOffset MacroAssembler::storeUnalignedSimd128(FloatRegister src, 2839 const Address& dest) { 2840 return storeUnalignedSimd128Int(src, dest); 2841 } 2842 2843 FaultingCodeOffset MacroAssembler::storeUnalignedSimd128( 2844 FloatRegister src, const BaseIndex& dest) { 2845 return storeUnalignedSimd128Int(src, dest); 2846 } 2847 2848 // Floating point negation 2849 2850 void MacroAssembler::negFloat32x4(FloatRegister src, FloatRegister dest) { 2851 src = moveSimd128FloatIfNotAVX(src, dest); 2852 bitwiseXorSimd128(src, SimdConstant::SplatX4(-0.f), dest); 2853 } 2854 2855 void MacroAssembler::negFloat64x2(FloatRegister src, FloatRegister dest) { 2856 src = moveSimd128FloatIfNotAVX(src, dest); 2857 bitwiseXorSimd128(src, SimdConstant::SplatX2(-0.0), dest); 2858 } 2859 2860 // Floating point absolute value 2861 2862 void MacroAssembler::absFloat32x4(FloatRegister src, FloatRegister dest) { 2863 src = moveSimd128FloatIfNotAVX(src, dest); 2864 bitwiseAndSimd128(src, SimdConstant::SplatX4(0x7FFFFFFF), dest); 2865 } 2866 2867 void MacroAssembler::absFloat64x2(FloatRegister src, FloatRegister dest) { 2868 src = moveSimd128FloatIfNotAVX(src, dest); 2869 bitwiseAndSimd128(src, SimdConstant::SplatX2(int64_t(0x7FFFFFFFFFFFFFFFll)), 2870 dest); 2871 } 2872 2873 // NaN-propagating minimum 2874 2875 void MacroAssembler::minFloat32x4(FloatRegister lhs, FloatRegister rhs, 2876 FloatRegister dest, FloatRegister temp1, 2877 FloatRegister temp2) { 2878 MacroAssemblerX86Shared::minFloat32x4(lhs, rhs, temp1, temp2, dest); 2879 } 2880 2881 void MacroAssembler::minFloat64x2(FloatRegister lhs, FloatRegister rhs, 2882 FloatRegister dest, FloatRegister temp1, 2883 FloatRegister temp2) { 2884 MacroAssemblerX86Shared::minFloat64x2(lhs, rhs, temp1, temp2, dest); 2885 } 2886 2887 // NaN-propagating maximum 2888 2889 void MacroAssembler::maxFloat32x4(FloatRegister lhs, FloatRegister rhs, 2890 FloatRegister dest, FloatRegister temp1, 2891 FloatRegister temp2) { 2892 MacroAssemblerX86Shared::maxFloat32x4(lhs, rhs, temp1, temp2, dest); 2893 } 2894 2895 void MacroAssembler::maxFloat64x2(FloatRegister lhs, FloatRegister rhs, 2896 FloatRegister dest, FloatRegister temp1, 2897 FloatRegister temp2) { 2898 MacroAssemblerX86Shared::maxFloat64x2(lhs, rhs, temp1, temp2, dest); 2899 } 2900 2901 // Compare-based minimum 2902 2903 void MacroAssembler::pseudoMinFloat32x4(FloatRegister rhsOrRhsDest, 2904 FloatRegister lhsOrLhsDest) { 2905 // Shut up the linter by using the same names as in the declaration, then 2906 // aliasing here. 2907 FloatRegister rhsDest = rhsOrRhsDest; 2908 FloatRegister lhs = lhsOrLhsDest; 2909 vminps(Operand(lhs), rhsDest, rhsDest); 2910 } 2911 2912 void MacroAssembler::pseudoMinFloat32x4(FloatRegister lhs, FloatRegister rhs, 2913 FloatRegister dest) { 2914 vminps(Operand(rhs), lhs, dest); 2915 } 2916 2917 void MacroAssembler::pseudoMinFloat64x2(FloatRegister rhsOrRhsDest, 2918 FloatRegister lhsOrLhsDest) { 2919 FloatRegister rhsDest = rhsOrRhsDest; 2920 FloatRegister lhs = lhsOrLhsDest; 2921 vminpd(Operand(lhs), rhsDest, rhsDest); 2922 } 2923 2924 void MacroAssembler::pseudoMinFloat64x2(FloatRegister lhs, FloatRegister rhs, 2925 FloatRegister dest) { 2926 vminpd(Operand(rhs), lhs, dest); 2927 } 2928 2929 // Compare-based maximum 2930 2931 void MacroAssembler::pseudoMaxFloat32x4(FloatRegister rhsOrRhsDest, 2932 FloatRegister lhsOrLhsDest) { 2933 FloatRegister rhsDest = rhsOrRhsDest; 2934 FloatRegister lhs = lhsOrLhsDest; 2935 vmaxps(Operand(lhs), rhsDest, rhsDest); 2936 } 2937 2938 void MacroAssembler::pseudoMaxFloat32x4(FloatRegister lhs, FloatRegister rhs, 2939 FloatRegister dest) { 2940 vmaxps(Operand(rhs), lhs, dest); 2941 } 2942 2943 void MacroAssembler::pseudoMaxFloat64x2(FloatRegister rhsOrRhsDest, 2944 FloatRegister lhsOrLhsDest) { 2945 FloatRegister rhsDest = rhsOrRhsDest; 2946 FloatRegister lhs = lhsOrLhsDest; 2947 vmaxpd(Operand(lhs), rhsDest, rhsDest); 2948 } 2949 2950 void MacroAssembler::pseudoMaxFloat64x2(FloatRegister lhs, FloatRegister rhs, 2951 FloatRegister dest) { 2952 vmaxpd(Operand(rhs), lhs, dest); 2953 } 2954 2955 // Widening/pairwise integer dot product 2956 2957 void MacroAssembler::widenDotInt16x8(FloatRegister lhs, FloatRegister rhs, 2958 FloatRegister dest) { 2959 vpmaddwd(Operand(rhs), lhs, dest); 2960 } 2961 2962 void MacroAssembler::widenDotInt16x8(FloatRegister lhs, const SimdConstant& rhs, 2963 FloatRegister dest) { 2964 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpmaddwd, 2965 &MacroAssembler::vpmaddwdSimd128); 2966 } 2967 2968 void MacroAssembler::dotInt8x16Int7x16(FloatRegister lhs, FloatRegister rhs, 2969 FloatRegister dest) { 2970 ScratchSimd128Scope scratch(*this); 2971 if (lhs == dest && !HasAVX()) { 2972 moveSimd128Int(lhs, scratch); 2973 lhs = scratch; 2974 } 2975 rhs = moveSimd128IntIfNotAVX(rhs, dest); 2976 vpmaddubsw(lhs, rhs, dest); 2977 } 2978 2979 void MacroAssembler::dotInt8x16Int7x16ThenAdd(FloatRegister lhs, 2980 FloatRegister rhs, 2981 FloatRegister dest) { 2982 ScratchSimd128Scope scratch(*this); 2983 rhs = moveSimd128IntIfNotAVX(rhs, scratch); 2984 vpmaddubsw(lhs, rhs, scratch); 2985 vpmaddwdSimd128(SimdConstant::SplatX8(1), scratch, scratch); 2986 vpaddd(Operand(scratch), dest, dest); 2987 } 2988 2989 // Rounding 2990 2991 void MacroAssembler::ceilFloat32x4(FloatRegister src, FloatRegister dest) { 2992 vroundps(Assembler::SSERoundingMode::Ceil, Operand(src), dest); 2993 } 2994 2995 void MacroAssembler::ceilFloat64x2(FloatRegister src, FloatRegister dest) { 2996 vroundpd(Assembler::SSERoundingMode::Ceil, Operand(src), dest); 2997 } 2998 2999 void MacroAssembler::floorFloat32x4(FloatRegister src, FloatRegister dest) { 3000 vroundps(Assembler::SSERoundingMode::Floor, Operand(src), dest); 3001 } 3002 3003 void MacroAssembler::floorFloat64x2(FloatRegister src, FloatRegister dest) { 3004 vroundpd(Assembler::SSERoundingMode::Floor, Operand(src), dest); 3005 } 3006 3007 void MacroAssembler::truncFloat32x4(FloatRegister src, FloatRegister dest) { 3008 vroundps(Assembler::SSERoundingMode::Trunc, Operand(src), dest); 3009 } 3010 3011 void MacroAssembler::truncFloat64x2(FloatRegister src, FloatRegister dest) { 3012 vroundpd(Assembler::SSERoundingMode::Trunc, Operand(src), dest); 3013 } 3014 3015 void MacroAssembler::nearestFloat32x4(FloatRegister src, FloatRegister dest) { 3016 vroundps(Assembler::SSERoundingMode::Nearest, Operand(src), dest); 3017 } 3018 3019 void MacroAssembler::nearestFloat64x2(FloatRegister src, FloatRegister dest) { 3020 vroundpd(Assembler::SSERoundingMode::Nearest, Operand(src), dest); 3021 } 3022 3023 // Floating add 3024 3025 void MacroAssembler::addFloat32x4(FloatRegister lhs, FloatRegister rhs, 3026 FloatRegister dest) { 3027 vaddps(Operand(rhs), lhs, dest); 3028 } 3029 3030 void MacroAssembler::addFloat32x4(FloatRegister lhs, const SimdConstant& rhs, 3031 FloatRegister dest) { 3032 binarySimd128(lhs, rhs, dest, &MacroAssembler::vaddps, 3033 &MacroAssembler::vaddpsSimd128); 3034 } 3035 3036 void MacroAssembler::addFloat64x2(FloatRegister lhs, FloatRegister rhs, 3037 FloatRegister dest) { 3038 vaddpd(Operand(rhs), lhs, dest); 3039 } 3040 3041 void MacroAssembler::addFloat64x2(FloatRegister lhs, const SimdConstant& rhs, 3042 FloatRegister dest) { 3043 binarySimd128(lhs, rhs, dest, &MacroAssembler::vaddpd, 3044 &MacroAssembler::vaddpdSimd128); 3045 } 3046 3047 // Floating subtract 3048 3049 void MacroAssembler::subFloat32x4(FloatRegister lhs, FloatRegister rhs, 3050 FloatRegister dest) { 3051 vsubps(Operand(rhs), lhs, dest); 3052 } 3053 3054 void MacroAssembler::subFloat32x4(FloatRegister lhs, const SimdConstant& rhs, 3055 FloatRegister dest) { 3056 binarySimd128(lhs, rhs, dest, &MacroAssembler::vsubps, 3057 &MacroAssembler::vsubpsSimd128); 3058 } 3059 3060 void MacroAssembler::subFloat64x2(FloatRegister lhs, FloatRegister rhs, 3061 FloatRegister dest) { 3062 AssemblerX86Shared::vsubpd(Operand(rhs), lhs, dest); 3063 } 3064 3065 void MacroAssembler::subFloat64x2(FloatRegister lhs, const SimdConstant& rhs, 3066 FloatRegister dest) { 3067 binarySimd128(lhs, rhs, dest, &MacroAssembler::vsubpd, 3068 &MacroAssembler::vsubpdSimd128); 3069 } 3070 3071 // Floating division 3072 3073 void MacroAssembler::divFloat32x4(FloatRegister lhs, FloatRegister rhs, 3074 FloatRegister dest) { 3075 vdivps(Operand(rhs), lhs, dest); 3076 } 3077 3078 void MacroAssembler::divFloat32x4(FloatRegister lhs, const SimdConstant& rhs, 3079 FloatRegister dest) { 3080 binarySimd128(lhs, rhs, dest, &MacroAssembler::vdivps, 3081 &MacroAssembler::vdivpsSimd128); 3082 } 3083 3084 void MacroAssembler::divFloat64x2(FloatRegister lhs, FloatRegister rhs, 3085 FloatRegister dest) { 3086 vdivpd(Operand(rhs), lhs, dest); 3087 } 3088 3089 void MacroAssembler::divFloat64x2(FloatRegister lhs, const SimdConstant& rhs, 3090 FloatRegister dest) { 3091 binarySimd128(lhs, rhs, dest, &MacroAssembler::vdivpd, 3092 &MacroAssembler::vdivpdSimd128); 3093 } 3094 3095 // Floating Multiply 3096 3097 void MacroAssembler::mulFloat32x4(FloatRegister lhs, FloatRegister rhs, 3098 FloatRegister dest) { 3099 vmulps(Operand(rhs), lhs, dest); 3100 } 3101 3102 void MacroAssembler::mulFloat32x4(FloatRegister lhs, const SimdConstant& rhs, 3103 FloatRegister dest) { 3104 binarySimd128(lhs, rhs, dest, &MacroAssembler::vmulps, 3105 &MacroAssembler::vmulpsSimd128); 3106 } 3107 3108 void MacroAssembler::mulFloat64x2(FloatRegister lhs, FloatRegister rhs, 3109 FloatRegister dest) { 3110 vmulpd(Operand(rhs), lhs, dest); 3111 } 3112 3113 void MacroAssembler::mulFloat64x2(FloatRegister lhs, const SimdConstant& rhs, 3114 FloatRegister dest) { 3115 binarySimd128(lhs, rhs, dest, &MacroAssembler::vmulpd, 3116 &MacroAssembler::vmulpdSimd128); 3117 } 3118 3119 // Pairwise add 3120 3121 void MacroAssembler::extAddPairwiseInt8x16(FloatRegister src, 3122 FloatRegister dest) { 3123 ScratchSimd128Scope scratch(*this); 3124 if (dest == src) { 3125 moveSimd128(src, scratch); 3126 src = scratch; 3127 } 3128 loadConstantSimd128Int(SimdConstant::SplatX16(1), dest); 3129 vpmaddubsw(src, dest, dest); 3130 } 3131 3132 void MacroAssembler::unsignedExtAddPairwiseInt8x16(FloatRegister src, 3133 FloatRegister dest) { 3134 src = moveSimd128IntIfNotAVX(src, dest); 3135 vpmaddubswSimd128(SimdConstant::SplatX16(1), src, dest); 3136 } 3137 3138 void MacroAssembler::extAddPairwiseInt16x8(FloatRegister src, 3139 FloatRegister dest) { 3140 src = moveSimd128IntIfNotAVX(src, dest); 3141 vpmaddwdSimd128(SimdConstant::SplatX8(1), src, dest); 3142 } 3143 3144 void MacroAssembler::unsignedExtAddPairwiseInt16x8(FloatRegister src, 3145 FloatRegister dest) { 3146 src = moveSimd128IntIfNotAVX(src, dest); 3147 vpxorSimd128(SimdConstant::SplatX8(-0x8000), src, dest); 3148 vpmaddwdSimd128(SimdConstant::SplatX8(1), dest, dest); 3149 vpadddSimd128(SimdConstant::SplatX4(0x00010000), dest, dest); 3150 } 3151 3152 // Floating square root 3153 3154 void MacroAssembler::sqrtFloat32x4(FloatRegister src, FloatRegister dest) { 3155 vsqrtps(Operand(src), dest); 3156 } 3157 3158 void MacroAssembler::sqrtFloat64x2(FloatRegister src, FloatRegister dest) { 3159 vsqrtpd(Operand(src), dest); 3160 } 3161 3162 // Integer to floating point with rounding 3163 3164 void MacroAssembler::convertInt32x4ToFloat32x4(FloatRegister src, 3165 FloatRegister dest) { 3166 vcvtdq2ps(src, dest); 3167 } 3168 3169 void MacroAssembler::unsignedConvertInt32x4ToFloat32x4(FloatRegister src, 3170 FloatRegister dest) { 3171 MacroAssemblerX86Shared::unsignedConvertInt32x4ToFloat32x4(src, dest); 3172 } 3173 3174 void MacroAssembler::convertInt32x4ToFloat64x2(FloatRegister src, 3175 FloatRegister dest) { 3176 vcvtdq2pd(src, dest); 3177 } 3178 3179 void MacroAssembler::unsignedConvertInt32x4ToFloat64x2(FloatRegister src, 3180 FloatRegister dest) { 3181 MacroAssemblerX86Shared::unsignedConvertInt32x4ToFloat64x2(src, dest); 3182 } 3183 3184 // Floating point to integer with saturation 3185 3186 void MacroAssembler::truncSatFloat32x4ToInt32x4(FloatRegister src, 3187 FloatRegister dest) { 3188 MacroAssemblerX86Shared::truncSatFloat32x4ToInt32x4(src, dest); 3189 } 3190 3191 void MacroAssembler::unsignedTruncSatFloat32x4ToInt32x4(FloatRegister src, 3192 FloatRegister dest, 3193 FloatRegister temp) { 3194 MacroAssemblerX86Shared::unsignedTruncSatFloat32x4ToInt32x4(src, temp, dest); 3195 } 3196 3197 void MacroAssembler::truncSatFloat64x2ToInt32x4(FloatRegister src, 3198 FloatRegister dest, 3199 FloatRegister temp) { 3200 MacroAssemblerX86Shared::truncSatFloat64x2ToInt32x4(src, temp, dest); 3201 } 3202 3203 void MacroAssembler::unsignedTruncSatFloat64x2ToInt32x4(FloatRegister src, 3204 FloatRegister dest, 3205 FloatRegister temp) { 3206 MacroAssemblerX86Shared::unsignedTruncSatFloat64x2ToInt32x4(src, temp, dest); 3207 } 3208 3209 void MacroAssembler::truncFloat32x4ToInt32x4Relaxed(FloatRegister src, 3210 FloatRegister dest) { 3211 vcvttps2dq(src, dest); 3212 } 3213 3214 void MacroAssembler::unsignedTruncFloat32x4ToInt32x4Relaxed( 3215 FloatRegister src, FloatRegister dest) { 3216 MacroAssemblerX86Shared::unsignedTruncFloat32x4ToInt32x4Relaxed(src, dest); 3217 } 3218 3219 void MacroAssembler::truncFloat64x2ToInt32x4Relaxed(FloatRegister src, 3220 FloatRegister dest) { 3221 vcvttpd2dq(src, dest); 3222 } 3223 3224 void MacroAssembler::unsignedTruncFloat64x2ToInt32x4Relaxed( 3225 FloatRegister src, FloatRegister dest) { 3226 MacroAssemblerX86Shared::unsignedTruncFloat64x2ToInt32x4Relaxed(src, dest); 3227 } 3228 3229 // Floating point widening 3230 3231 void MacroAssembler::convertFloat64x2ToFloat32x4(FloatRegister src, 3232 FloatRegister dest) { 3233 vcvtpd2ps(src, dest); 3234 } 3235 3236 void MacroAssembler::convertFloat32x4ToFloat64x2(FloatRegister src, 3237 FloatRegister dest) { 3238 vcvtps2pd(src, dest); 3239 } 3240 3241 // Integer to integer narrowing 3242 3243 void MacroAssembler::narrowInt16x8(FloatRegister lhs, FloatRegister rhs, 3244 FloatRegister dest) { 3245 vpacksswb(Operand(rhs), lhs, dest); 3246 } 3247 3248 void MacroAssembler::narrowInt16x8(FloatRegister lhs, const SimdConstant& rhs, 3249 FloatRegister dest) { 3250 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpacksswb, 3251 &MacroAssembler::vpacksswbSimd128); 3252 } 3253 3254 void MacroAssembler::unsignedNarrowInt16x8(FloatRegister lhs, FloatRegister rhs, 3255 FloatRegister dest) { 3256 vpackuswb(Operand(rhs), lhs, dest); 3257 } 3258 3259 void MacroAssembler::unsignedNarrowInt16x8(FloatRegister lhs, 3260 const SimdConstant& rhs, 3261 FloatRegister dest) { 3262 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpackuswb, 3263 &MacroAssembler::vpackuswbSimd128); 3264 } 3265 3266 void MacroAssembler::narrowInt32x4(FloatRegister lhs, FloatRegister rhs, 3267 FloatRegister dest) { 3268 vpackssdw(Operand(rhs), lhs, dest); 3269 } 3270 3271 void MacroAssembler::narrowInt32x4(FloatRegister lhs, const SimdConstant& rhs, 3272 FloatRegister dest) { 3273 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpackssdw, 3274 &MacroAssembler::vpackssdwSimd128); 3275 } 3276 3277 void MacroAssembler::unsignedNarrowInt32x4(FloatRegister lhs, FloatRegister rhs, 3278 FloatRegister dest) { 3279 vpackusdw(Operand(rhs), lhs, dest); 3280 } 3281 3282 void MacroAssembler::unsignedNarrowInt32x4(FloatRegister lhs, 3283 const SimdConstant& rhs, 3284 FloatRegister dest) { 3285 binarySimd128(lhs, rhs, dest, &MacroAssembler::vpackusdw, 3286 &MacroAssembler::vpackusdwSimd128); 3287 } 3288 3289 // Integer to integer widening 3290 3291 void MacroAssembler::widenLowInt8x16(FloatRegister src, FloatRegister dest) { 3292 vpmovsxbw(Operand(src), dest); 3293 } 3294 3295 void MacroAssembler::widenHighInt8x16(FloatRegister src, FloatRegister dest) { 3296 vpalignr(Operand(src), dest, dest, 8); 3297 vpmovsxbw(Operand(dest), dest); 3298 } 3299 3300 void MacroAssembler::unsignedWidenLowInt8x16(FloatRegister src, 3301 FloatRegister dest) { 3302 vpmovzxbw(Operand(src), dest); 3303 } 3304 3305 void MacroAssembler::unsignedWidenHighInt8x16(FloatRegister src, 3306 FloatRegister dest) { 3307 vpalignr(Operand(src), dest, dest, 8); 3308 vpmovzxbw(Operand(dest), dest); 3309 } 3310 3311 void MacroAssembler::widenLowInt16x8(FloatRegister src, FloatRegister dest) { 3312 vpmovsxwd(Operand(src), dest); 3313 } 3314 3315 void MacroAssembler::widenHighInt16x8(FloatRegister src, FloatRegister dest) { 3316 vpalignr(Operand(src), dest, dest, 8); 3317 vpmovsxwd(Operand(dest), dest); 3318 } 3319 3320 void MacroAssembler::unsignedWidenLowInt16x8(FloatRegister src, 3321 FloatRegister dest) { 3322 vpmovzxwd(Operand(src), dest); 3323 } 3324 3325 void MacroAssembler::unsignedWidenHighInt16x8(FloatRegister src, 3326 FloatRegister dest) { 3327 vpalignr(Operand(src), dest, dest, 8); 3328 vpmovzxwd(Operand(dest), dest); 3329 } 3330 3331 void MacroAssembler::widenLowInt32x4(FloatRegister src, FloatRegister dest) { 3332 vpmovsxdq(Operand(src), dest); 3333 } 3334 3335 void MacroAssembler::unsignedWidenLowInt32x4(FloatRegister src, 3336 FloatRegister dest) { 3337 vpmovzxdq(Operand(src), dest); 3338 } 3339 3340 void MacroAssembler::widenHighInt32x4(FloatRegister src, FloatRegister dest) { 3341 if (src == dest || HasAVX()) { 3342 vmovhlps(src, src, dest); 3343 } else { 3344 vpshufd(ComputeShuffleMask(2, 3, 2, 3), src, dest); 3345 } 3346 vpmovsxdq(Operand(dest), dest); 3347 } 3348 3349 void MacroAssembler::unsignedWidenHighInt32x4(FloatRegister src, 3350 FloatRegister dest) { 3351 ScratchSimd128Scope scratch(*this); 3352 src = moveSimd128IntIfNotAVX(src, dest); 3353 vpxor(scratch, scratch, scratch); 3354 vpunpckhdq(scratch, src, dest); 3355 } 3356 3357 // Floating multiply-accumulate: srcDest [+-]= src1 * src2 3358 // The Intel FMA feature is some AVX* special sauce, no support yet. 3359 3360 void MacroAssembler::fmaFloat32x4(FloatRegister src1, FloatRegister src2, 3361 FloatRegister srcDest) { 3362 if (HasFMA()) { 3363 vfmadd231ps(src2, src1, srcDest); 3364 return; 3365 } 3366 ScratchSimd128Scope scratch(*this); 3367 src1 = moveSimd128FloatIfNotAVX(src1, scratch); 3368 mulFloat32x4(src1, src2, scratch); 3369 addFloat32x4(srcDest, scratch, srcDest); 3370 } 3371 3372 void MacroAssembler::fnmaFloat32x4(FloatRegister src1, FloatRegister src2, 3373 FloatRegister srcDest) { 3374 if (HasFMA()) { 3375 vfnmadd231ps(src2, src1, srcDest); 3376 return; 3377 } 3378 ScratchSimd128Scope scratch(*this); 3379 src1 = moveSimd128FloatIfNotAVX(src1, scratch); 3380 mulFloat32x4(src1, src2, scratch); 3381 subFloat32x4(srcDest, scratch, srcDest); 3382 } 3383 3384 void MacroAssembler::fmaFloat64x2(FloatRegister src1, FloatRegister src2, 3385 FloatRegister srcDest) { 3386 if (HasFMA()) { 3387 vfmadd231pd(src2, src1, srcDest); 3388 return; 3389 } 3390 ScratchSimd128Scope scratch(*this); 3391 src1 = moveSimd128FloatIfNotAVX(src1, scratch); 3392 mulFloat64x2(src1, src2, scratch); 3393 addFloat64x2(srcDest, scratch, srcDest); 3394 } 3395 3396 void MacroAssembler::fnmaFloat64x2(FloatRegister src1, FloatRegister src2, 3397 FloatRegister srcDest) { 3398 if (HasFMA()) { 3399 vfnmadd231pd(src2, src1, srcDest); 3400 return; 3401 } 3402 ScratchSimd128Scope scratch(*this); 3403 src1 = moveSimd128FloatIfNotAVX(src1, scratch); 3404 mulFloat64x2(src1, src2, scratch); 3405 subFloat64x2(srcDest, scratch, srcDest); 3406 } 3407 3408 void MacroAssembler::minFloat32x4Relaxed(FloatRegister src, 3409 FloatRegister srcDest) { 3410 vminps(Operand(src), srcDest, srcDest); 3411 } 3412 3413 void MacroAssembler::minFloat32x4Relaxed(FloatRegister lhs, FloatRegister rhs, 3414 FloatRegister dest) { 3415 vminps(Operand(rhs), lhs, dest); 3416 } 3417 3418 void MacroAssembler::maxFloat32x4Relaxed(FloatRegister src, 3419 FloatRegister srcDest) { 3420 vmaxps(Operand(src), srcDest, srcDest); 3421 } 3422 3423 void MacroAssembler::maxFloat32x4Relaxed(FloatRegister lhs, FloatRegister rhs, 3424 FloatRegister dest) { 3425 vmaxps(Operand(rhs), lhs, dest); 3426 } 3427 3428 void MacroAssembler::minFloat64x2Relaxed(FloatRegister src, 3429 FloatRegister srcDest) { 3430 vminpd(Operand(src), srcDest, srcDest); 3431 } 3432 3433 void MacroAssembler::minFloat64x2Relaxed(FloatRegister lhs, FloatRegister rhs, 3434 FloatRegister dest) { 3435 vminpd(Operand(rhs), lhs, dest); 3436 } 3437 3438 void MacroAssembler::maxFloat64x2Relaxed(FloatRegister src, 3439 FloatRegister srcDest) { 3440 vmaxpd(Operand(src), srcDest, srcDest); 3441 } 3442 3443 void MacroAssembler::maxFloat64x2Relaxed(FloatRegister lhs, FloatRegister rhs, 3444 FloatRegister dest) { 3445 vmaxpd(Operand(rhs), lhs, dest); 3446 } 3447 3448 // ======================================================================== 3449 // Truncate floating point. 3450 3451 void MacroAssembler::truncateFloat32ToInt64(Address src, Address dest, 3452 Register temp) { 3453 if (Assembler::HasSSE3()) { 3454 fld32(Operand(src)); 3455 fisttp(Operand(dest)); 3456 return; 3457 } 3458 3459 if (src.base == esp) { 3460 src.offset += 2 * sizeof(int32_t); 3461 } 3462 if (dest.base == esp) { 3463 dest.offset += 2 * sizeof(int32_t); 3464 } 3465 3466 reserveStack(2 * sizeof(int32_t)); 3467 3468 // Set conversion to truncation. 3469 fnstcw(Operand(esp, 0)); 3470 load32(Operand(esp, 0), temp); 3471 andl(Imm32(~0xFF00), temp); 3472 orl(Imm32(0xCFF), temp); 3473 store32(temp, Address(esp, sizeof(int32_t))); 3474 fldcw(Operand(esp, sizeof(int32_t))); 3475 3476 // Load double on fp stack, convert and load regular stack. 3477 fld32(Operand(src)); 3478 fistp(Operand(dest)); 3479 3480 // Reset the conversion flag. 3481 fldcw(Operand(esp, 0)); 3482 3483 freeStack(2 * sizeof(int32_t)); 3484 } 3485 void MacroAssembler::truncateDoubleToInt64(Address src, Address dest, 3486 Register temp) { 3487 if (Assembler::HasSSE3()) { 3488 fld(Operand(src)); 3489 fisttp(Operand(dest)); 3490 return; 3491 } 3492 3493 if (src.base == esp) { 3494 src.offset += 2 * sizeof(int32_t); 3495 } 3496 if (dest.base == esp) { 3497 dest.offset += 2 * sizeof(int32_t); 3498 } 3499 3500 reserveStack(2 * sizeof(int32_t)); 3501 3502 // Set conversion to truncation. 3503 fnstcw(Operand(esp, 0)); 3504 load32(Operand(esp, 0), temp); 3505 andl(Imm32(~0xFF00), temp); 3506 orl(Imm32(0xCFF), temp); 3507 store32(temp, Address(esp, 1 * sizeof(int32_t))); 3508 fldcw(Operand(esp, 1 * sizeof(int32_t))); 3509 3510 // Load double on fp stack, convert and load regular stack. 3511 fld(Operand(src)); 3512 fistp(Operand(dest)); 3513 3514 // Reset the conversion flag. 3515 fldcw(Operand(esp, 0)); 3516 3517 freeStack(2 * sizeof(int32_t)); 3518 } 3519 3520 // =============================================================== 3521 // Clamping functions. 3522 3523 void MacroAssembler::clampIntToUint8(Register reg) { 3524 Label inRange; 3525 branchTest32(Assembler::Zero, reg, Imm32(0xffffff00), &inRange); 3526 { 3527 sarl(Imm32(31), reg); 3528 notl(reg); 3529 andl(Imm32(255), reg); 3530 } 3531 bind(&inRange); 3532 } 3533 3534 //}}} check_macroassembler_style 3535 // =============================================================== 3536 3537 } // namespace jit 3538 } // namespace js 3539 3540 #endif /* jit_x86_shared_MacroAssembler_x86_shared_inl_h */