Lowering-x86-shared.cpp (60454B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "jit/x86-shared/Lowering-x86-shared.h" 8 9 #include "mozilla/MathAlgorithms.h" 10 11 #include "jit/Lowering.h" 12 #include "jit/MIR-wasm.h" 13 #include "jit/MIR.h" 14 #include "wasm/WasmFeatures.h" // for wasm::ReportSimdAnalysis 15 16 #include "jit/shared/Lowering-shared-inl.h" 17 18 using namespace js; 19 using namespace js::jit; 20 21 using mozilla::Abs; 22 using mozilla::FloorLog2; 23 using mozilla::Maybe; 24 using mozilla::Nothing; 25 using mozilla::Some; 26 27 LTableSwitch* LIRGeneratorX86Shared::newLTableSwitch( 28 const LAllocation& in, const LDefinition& inputCopy) { 29 return new (alloc()) LTableSwitch(in, inputCopy, temp()); 30 } 31 32 LTableSwitchV* LIRGeneratorX86Shared::newLTableSwitchV( 33 const LBoxAllocation& in) { 34 return new (alloc()) LTableSwitchV(in, temp(), tempDouble(), temp()); 35 } 36 37 LUse LIRGeneratorX86Shared::useShiftRegister(MDefinition* mir) { 38 // Unless BMI2 is available, the shift register must be ecx. x86 can't shift a 39 // non-ecx register. 40 if (Assembler::HasBMI2()) { 41 return useRegister(mir); 42 } 43 return useFixed(mir, ecx); 44 } 45 46 LUse LIRGeneratorX86Shared::useShiftRegisterAtStart(MDefinition* mir) { 47 // Unless BMI2 is available, the shift register must be ecx. x86 can't shift a 48 // non-ecx register. 49 if (Assembler::HasBMI2()) { 50 return useRegisterAtStart(mir); 51 } 52 return useFixedAtStart(mir, ecx); 53 } 54 55 LDefinition LIRGeneratorX86Shared::tempShift() { 56 // Unless BMI2 is available, the shift register must be ecx. x86 can't shift a 57 // non-ecx register. 58 if (Assembler::HasBMI2()) { 59 return temp(); 60 } 61 return tempFixed(ecx); 62 } 63 64 void LIRGeneratorX86Shared::lowerForShift(LInstructionHelper<1, 2, 0>* ins, 65 MDefinition* mir, MDefinition* lhs, 66 MDefinition* rhs) { 67 ins->setOperand(0, useRegisterAtStart(lhs)); 68 69 if (rhs->isConstant()) { 70 ins->setOperand(1, useOrConstantAtStart(rhs)); 71 defineReuseInput(ins, mir, 0); 72 } else if (!mir->isRotate()) { 73 if (Assembler::HasBMI2()) { 74 ins->setOperand(1, useRegisterAtStart(rhs)); 75 define(ins, mir); 76 } else { 77 ins->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs) 78 ? useShiftRegister(rhs) 79 : useShiftRegisterAtStart(rhs)); 80 defineReuseInput(ins, mir, 0); 81 } 82 } else { 83 ins->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs) 84 ? useFixed(rhs, ecx) 85 : useFixedAtStart(rhs, ecx)); 86 defineReuseInput(ins, mir, 0); 87 } 88 } 89 90 void LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 1, 0>* ins, 91 MDefinition* mir, MDefinition* input) { 92 ins->setOperand(0, useRegisterAtStart(input)); 93 defineReuseInput(ins, mir, 0); 94 } 95 96 void LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 2, 0>* ins, 97 MDefinition* mir, MDefinition* lhs, 98 MDefinition* rhs) { 99 if (MOZ_UNLIKELY(mir->isAdd() && mir->type() == MIRType::Int32 && 100 rhs->isConstant() && !mir->toAdd()->fallible())) { 101 // Special case instruction that is widely used in Wasm during address 102 // calculation. And x86 platform has LEA instruction for it. 103 // See CodeGenerator::visitAddI for codegen. 104 ins->setOperand(0, useRegisterAtStart(lhs)); 105 ins->setOperand(1, useOrConstantAtStart(rhs)); 106 define(ins, mir); 107 return; 108 } 109 110 ins->setOperand(0, useRegisterAtStart(lhs)); 111 ins->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs) 112 ? useOrConstant(rhs) 113 : useOrConstantAtStart(rhs)); 114 defineReuseInput(ins, mir, 0); 115 } 116 117 void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 1, 0>* ins, 118 MDefinition* mir, MDefinition* input) { 119 // Without AVX, we'll need to use the x86 encodings where the input must be 120 // the same location as the output. 121 if (!Assembler::HasAVX()) { 122 ins->setOperand(0, useRegisterAtStart(input)); 123 defineReuseInput(ins, mir, 0); 124 } else { 125 ins->setOperand(0, useRegisterAtStart(input)); 126 define(ins, mir); 127 } 128 } 129 130 void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, 0>* ins, 131 MDefinition* mir, MDefinition* lhs, 132 MDefinition* rhs) { 133 // Without AVX, we'll need to use the x86 encodings where one of the 134 // inputs must be the same location as the output. 135 if (!Assembler::HasAVX()) { 136 ins->setOperand(0, useRegisterAtStart(lhs)); 137 ins->setOperand( 138 1, willHaveDifferentLIRNodes(lhs, rhs) ? use(rhs) : useAtStart(rhs)); 139 defineReuseInput(ins, mir, 0); 140 } else { 141 ins->setOperand(0, useRegisterAtStart(lhs)); 142 ins->setOperand(1, useAtStart(rhs)); 143 define(ins, mir); 144 } 145 } 146 147 void LIRGeneratorX86Shared::lowerMulI(MMul* mul, MDefinition* lhs, 148 MDefinition* rhs) { 149 if (rhs->isConstant()) { 150 auto* lir = new (alloc()) LMulI(useRegisterAtStart(lhs), 151 useOrConstantAtStart(rhs), LAllocation()); 152 if (mul->fallible()) { 153 assignSnapshot(lir, mul->bailoutKind()); 154 } 155 define(lir, mul); 156 return; 157 } 158 159 // Note: If we need a negative zero check, lhs is used twice. 160 LAllocation lhsCopy = mul->canBeNegativeZero() ? use(lhs) : LAllocation(); 161 LMulI* lir = new (alloc()) 162 LMulI(useRegisterAtStart(lhs), 163 willHaveDifferentLIRNodes(lhs, rhs) ? use(rhs) : useAtStart(rhs), 164 lhsCopy); 165 if (mul->fallible()) { 166 assignSnapshot(lir, mul->bailoutKind()); 167 } 168 defineReuseInput(lir, mul, 0); 169 } 170 171 void LIRGeneratorX86Shared::lowerDivI(MDiv* div) { 172 // Division instructions are slow. Division by constant denominators can be 173 // rewritten to use other instructions. 174 if (div->rhs()->isConstant()) { 175 int32_t rhs = div->rhs()->toConstant()->toInt32(); 176 177 // Division by powers of two can be done by shifting, and division by 178 // other numbers can be done by a reciprocal multiplication technique. 179 int32_t shift = FloorLog2(Abs(rhs)); 180 if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) { 181 LAllocation lhs = useRegisterAtStart(div->lhs()); 182 183 // When truncated with maybe a non-zero remainder, we have to round the 184 // result toward 0. This requires an extra register to round up/down 185 // whether the left-hand-side is signed. 186 // 187 // If the numerator might be signed, and needs adjusting, then an extra 188 // lhs copy is needed to round the result of the integer division towards 189 // zero. 190 // 191 // Otherwise the numerator is unsigned, so does not need adjusting. 192 bool needRoundNeg = div->canBeNegativeDividend() && div->isTruncated(); 193 LAllocation lhsCopy = 194 needRoundNeg ? useRegister(div->lhs()) : LAllocation(); 195 196 auto* lir = new (alloc()) LDivPowTwoI(lhs, lhsCopy, shift, rhs < 0); 197 if (div->fallible()) { 198 assignSnapshot(lir, div->bailoutKind()); 199 } 200 defineReuseInput(lir, div, 0); 201 return; 202 } 203 204 #ifdef JS_CODEGEN_X86 205 auto* lir = new (alloc()) 206 LDivConstantI(useRegister(div->lhs()), tempFixed(eax), rhs); 207 if (div->fallible()) { 208 assignSnapshot(lir, div->bailoutKind()); 209 } 210 defineFixed(lir, div, LAllocation(AnyRegister(edx))); 211 #else 212 auto* lir = 213 new (alloc()) LDivConstantI(useRegister(div->lhs()), temp(), rhs); 214 if (div->fallible()) { 215 assignSnapshot(lir, div->bailoutKind()); 216 } 217 define(lir, div); 218 #endif 219 return; 220 } 221 222 auto* lir = new (alloc()) LDivI(useFixedAtStart(div->lhs(), eax), 223 useRegister(div->rhs()), tempFixed(edx)); 224 if (div->fallible()) { 225 assignSnapshot(lir, div->bailoutKind()); 226 } 227 defineFixed(lir, div, LAllocation(AnyRegister(eax))); 228 } 229 230 void LIRGeneratorX86Shared::lowerModI(MMod* mod) { 231 if (mod->rhs()->isConstant()) { 232 int32_t rhs = mod->rhs()->toConstant()->toInt32(); 233 int32_t shift = FloorLog2(Abs(rhs)); 234 if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) { 235 auto* lir = 236 new (alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift); 237 if (mod->fallible()) { 238 assignSnapshot(lir, mod->bailoutKind()); 239 } 240 defineReuseInput(lir, mod, 0); 241 return; 242 } 243 244 #ifdef JS_CODEGEN_X86 245 auto* lir = new (alloc()) 246 LModConstantI(useRegister(mod->lhs()), tempFixed(edx), rhs); 247 if (mod->fallible()) { 248 assignSnapshot(lir, mod->bailoutKind()); 249 } 250 defineFixed(lir, mod, LAllocation(AnyRegister(eax))); 251 #else 252 auto* lir = 253 new (alloc()) LModConstantI(useRegister(mod->lhs()), temp(), rhs); 254 if (mod->fallible()) { 255 assignSnapshot(lir, mod->bailoutKind()); 256 } 257 define(lir, mod); 258 #endif 259 return; 260 } 261 262 auto* lir = new (alloc()) LModI(useFixedAtStart(mod->lhs(), eax), 263 useRegister(mod->rhs()), tempFixed(eax)); 264 if (mod->fallible()) { 265 assignSnapshot(lir, mod->bailoutKind()); 266 } 267 defineFixed(lir, mod, LAllocation(AnyRegister(edx))); 268 } 269 270 void LIRGeneratorX86Shared::lowerWasmSelectI(MWasmSelect* select) { 271 auto* lir = new (alloc()) 272 LWasmSelect(useRegisterAtStart(select->trueExpr()), 273 useAny(select->falseExpr()), useRegister(select->condExpr())); 274 defineReuseInput(lir, select, LWasmSelect::TrueExprIndex); 275 } 276 277 void LIRGeneratorX86Shared::lowerWasmSelectI64(MWasmSelect* select) { 278 auto* lir = new (alloc()) LWasmSelectI64( 279 useInt64RegisterAtStart(select->trueExpr()), 280 useInt64(select->falseExpr()), useRegister(select->condExpr())); 281 defineInt64ReuseInput(lir, select, LWasmSelectI64::TrueExprIndex); 282 } 283 284 void LIRGenerator::visitAsmJSLoadHeap(MAsmJSLoadHeap* ins) { 285 MDefinition* base = ins->base(); 286 MOZ_ASSERT(base->type() == MIRType::Int32); 287 288 MDefinition* boundsCheckLimit = ins->boundsCheckLimit(); 289 MOZ_ASSERT_IF(ins->needsBoundsCheck(), 290 boundsCheckLimit->type() == MIRType::Int32); 291 292 // For simplicity, require a register if we're going to emit a bounds-check 293 // branch, so that we don't have special cases for constants. This should 294 // only happen in rare constant-folding cases since asm.js sets the minimum 295 // heap size based when accessed via constant. 296 LAllocation baseAlloc = ins->needsBoundsCheck() 297 ? useRegisterAtStart(base) 298 : useRegisterOrZeroAtStart(base); 299 300 LAllocation limitAlloc = ins->needsBoundsCheck() 301 ? useRegisterAtStart(boundsCheckLimit) 302 : LAllocation(); 303 LAllocation memoryBaseAlloc = ins->hasMemoryBase() 304 ? useRegisterAtStart(ins->memoryBase()) 305 : LAllocation(); 306 307 auto* lir = 308 new (alloc()) LAsmJSLoadHeap(baseAlloc, limitAlloc, memoryBaseAlloc); 309 define(lir, ins); 310 } 311 312 void LIRGenerator::visitAsmJSStoreHeap(MAsmJSStoreHeap* ins) { 313 MDefinition* base = ins->base(); 314 MOZ_ASSERT(base->type() == MIRType::Int32); 315 316 MDefinition* boundsCheckLimit = ins->boundsCheckLimit(); 317 MOZ_ASSERT_IF(ins->needsBoundsCheck(), 318 boundsCheckLimit->type() == MIRType::Int32); 319 320 // For simplicity, require a register if we're going to emit a bounds-check 321 // branch, so that we don't have special cases for constants. This should 322 // only happen in rare constant-folding cases since asm.js sets the minimum 323 // heap size based when accessed via constant. 324 LAllocation baseAlloc = ins->needsBoundsCheck() 325 ? useRegisterAtStart(base) 326 : useRegisterOrZeroAtStart(base); 327 328 LAllocation limitAlloc = ins->needsBoundsCheck() 329 ? useRegisterAtStart(boundsCheckLimit) 330 : LAllocation(); 331 LAllocation memoryBaseAlloc = ins->hasMemoryBase() 332 ? useRegisterAtStart(ins->memoryBase()) 333 : LAllocation(); 334 335 LAsmJSStoreHeap* lir = nullptr; 336 switch (ins->access().type()) { 337 case Scalar::Int8: 338 case Scalar::Uint8: 339 #ifdef JS_CODEGEN_X86 340 // See comment for LIRGeneratorX86::useByteOpRegister. 341 lir = new (alloc()) LAsmJSStoreHeap( 342 baseAlloc, useFixed(ins->value(), eax), limitAlloc, memoryBaseAlloc); 343 break; 344 #endif 345 case Scalar::Int16: 346 case Scalar::Uint16: 347 case Scalar::Int32: 348 case Scalar::Uint32: 349 case Scalar::Float32: 350 case Scalar::Float64: 351 // For now, don't allow constant values. The immediate operand affects 352 // instruction layout which affects patching. 353 lir = new (alloc()) 354 LAsmJSStoreHeap(baseAlloc, useRegisterAtStart(ins->value()), 355 limitAlloc, memoryBaseAlloc); 356 break; 357 case Scalar::Int64: 358 case Scalar::Simd128: 359 MOZ_CRASH("NYI"); 360 case Scalar::Uint8Clamped: 361 case Scalar::BigInt64: 362 case Scalar::BigUint64: 363 case Scalar::Float16: 364 case Scalar::MaxTypedArrayViewType: 365 MOZ_CRASH("unexpected array type"); 366 } 367 add(lir, ins); 368 } 369 370 void LIRGeneratorX86Shared::lowerUDiv(MDiv* div) { 371 if (div->rhs()->isConstant()) { 372 // NOTE: the result of toInt32 is coerced to uint32_t. 373 uint32_t rhs = div->rhs()->toConstant()->toInt32(); 374 int32_t shift = FloorLog2(rhs); 375 376 if (rhs != 0 && uint32_t(1) << shift == rhs) { 377 auto* lir = new (alloc()) LDivPowTwoI(useRegisterAtStart(div->lhs()), 378 LAllocation(), shift, false); 379 if (div->fallible()) { 380 assignSnapshot(lir, div->bailoutKind()); 381 } 382 defineReuseInput(lir, div, 0); 383 } else { 384 #ifdef JS_CODEGEN_X86 385 auto* lir = new (alloc()) 386 LUDivConstant(useRegister(div->lhs()), tempFixed(eax), rhs); 387 if (div->fallible()) { 388 assignSnapshot(lir, div->bailoutKind()); 389 } 390 defineFixed(lir, div, LAllocation(AnyRegister(edx))); 391 #else 392 auto* lir = 393 new (alloc()) LUDivConstant(useRegister(div->lhs()), temp(), rhs); 394 if (div->fallible()) { 395 assignSnapshot(lir, div->bailoutKind()); 396 } 397 define(lir, div); 398 #endif 399 } 400 return; 401 } 402 403 auto* lir = new (alloc()) LUDiv(useFixedAtStart(div->lhs(), eax), 404 useRegister(div->rhs()), tempFixed(edx)); 405 if (div->fallible()) { 406 assignSnapshot(lir, div->bailoutKind()); 407 } 408 defineFixed(lir, div, LAllocation(AnyRegister(eax))); 409 } 410 411 void LIRGeneratorX86Shared::lowerUMod(MMod* mod) { 412 if (mod->rhs()->isConstant()) { 413 uint32_t rhs = mod->rhs()->toConstant()->toInt32(); 414 int32_t shift = FloorLog2(rhs); 415 416 if (rhs != 0 && uint32_t(1) << shift == rhs) { 417 auto* lir = 418 new (alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift); 419 if (mod->fallible()) { 420 assignSnapshot(lir, mod->bailoutKind()); 421 } 422 defineReuseInput(lir, mod, 0); 423 } else { 424 #ifdef JS_CODEGEN_X86 425 auto* lir = new (alloc()) 426 LUModConstant(useRegister(mod->lhs()), tempFixed(edx), rhs); 427 if (mod->fallible()) { 428 assignSnapshot(lir, mod->bailoutKind()); 429 } 430 defineFixed(lir, mod, LAllocation(AnyRegister(eax))); 431 #else 432 auto* lir = 433 new (alloc()) LUModConstant(useRegister(mod->lhs()), temp(), rhs); 434 if (mod->fallible()) { 435 assignSnapshot(lir, mod->bailoutKind()); 436 } 437 define(lir, mod); 438 #endif 439 } 440 return; 441 } 442 443 auto* lir = new (alloc()) LUMod(useFixedAtStart(mod->lhs(), eax), 444 useRegister(mod->rhs()), tempFixed(eax)); 445 if (mod->fallible()) { 446 assignSnapshot(lir, mod->bailoutKind()); 447 } 448 defineFixed(lir, mod, LAllocation(AnyRegister(edx))); 449 } 450 451 void LIRGeneratorX86Shared::lowerUrshD(MUrsh* mir) { 452 MDefinition* lhs = mir->lhs(); 453 MDefinition* rhs = mir->rhs(); 454 455 MOZ_ASSERT(lhs->type() == MIRType::Int32); 456 MOZ_ASSERT(rhs->type() == MIRType::Int32); 457 MOZ_ASSERT(mir->type() == MIRType::Double); 458 459 LUse lhsUse = useRegisterAtStart(lhs); 460 LAllocation rhsAlloc; 461 LDefinition tempDef; 462 if (rhs->isConstant()) { 463 rhsAlloc = useOrConstant(rhs); 464 tempDef = tempCopy(lhs, 0); 465 } else if (Assembler::HasBMI2()) { 466 rhsAlloc = useRegisterAtStart(rhs); 467 tempDef = temp(); 468 } else { 469 rhsAlloc = useShiftRegister(rhs); 470 tempDef = tempCopy(lhs, 0); 471 } 472 473 auto* lir = new (alloc()) LUrshD(lhsUse, rhsAlloc, tempDef); 474 define(lir, mir); 475 } 476 477 void LIRGeneratorX86Shared::lowerPowOfTwoI(MPow* mir) { 478 int32_t base = mir->input()->toConstant()->toInt32(); 479 MDefinition* power = mir->power(); 480 481 auto* lir = new (alloc()) LPowOfTwoI(useShiftRegister(power), base); 482 assignSnapshot(lir, mir->bailoutKind()); 483 define(lir, mir); 484 } 485 486 void LIRGeneratorX86Shared::lowerBigIntPtrLsh(MBigIntPtrLsh* ins) { 487 auto* lir = new (alloc()) LBigIntPtrLsh( 488 useRegister(ins->lhs()), useRegister(ins->rhs()), temp(), tempShift()); 489 assignSnapshot(lir, ins->bailoutKind()); 490 define(lir, ins); 491 } 492 493 void LIRGeneratorX86Shared::lowerBigIntPtrRsh(MBigIntPtrRsh* ins) { 494 auto* lir = new (alloc()) LBigIntPtrRsh( 495 useRegister(ins->lhs()), useRegister(ins->rhs()), temp(), tempShift()); 496 assignSnapshot(lir, ins->bailoutKind()); 497 define(lir, ins); 498 } 499 500 void LIRGeneratorX86Shared::lowerCompareExchangeTypedArrayElement( 501 MCompareExchangeTypedArrayElement* ins, bool useI386ByteRegisters) { 502 MOZ_ASSERT(!Scalar::isFloatingType(ins->arrayType())); 503 MOZ_ASSERT(ins->elements()->type() == MIRType::Elements); 504 MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr); 505 506 const LUse elements = useRegister(ins->elements()); 507 const LAllocation index = 508 useRegisterOrIndexConstant(ins->index(), ins->arrayType()); 509 510 // If the target is a floating register then we need a temp at the 511 // lower level; that temp must be eax. 512 // 513 // Otherwise the target (if used) is an integer register, which 514 // must be eax. If the target is not used the machine code will 515 // still clobber eax, so just pretend it's used. 516 // 517 // oldval must be in a register. 518 // 519 // newval must be in a register. If the source is a byte array 520 // then newval must be a register that has a byte size: on x86 521 // this must be ebx, ecx, or edx (eax is taken for the output). 522 // 523 // Bug #1077036 describes some further optimization opportunities. 524 525 bool fixedOutput = false; 526 LDefinition tempDef = LDefinition::BogusTemp(); 527 LAllocation newval; 528 if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) { 529 tempDef = tempFixed(eax); 530 newval = useRegister(ins->newval()); 531 } else { 532 fixedOutput = true; 533 if (useI386ByteRegisters && ins->isByteArray()) { 534 newval = useFixed(ins->newval(), ebx); 535 } else { 536 newval = useRegister(ins->newval()); 537 } 538 } 539 540 const LAllocation oldval = useRegister(ins->oldval()); 541 542 LCompareExchangeTypedArrayElement* lir = 543 new (alloc()) LCompareExchangeTypedArrayElement(elements, index, oldval, 544 newval, tempDef); 545 546 if (fixedOutput) { 547 defineFixed(lir, ins, LAllocation(AnyRegister(eax))); 548 } else { 549 define(lir, ins); 550 } 551 } 552 553 void LIRGeneratorX86Shared::lowerAtomicExchangeTypedArrayElement( 554 MAtomicExchangeTypedArrayElement* ins, bool useI386ByteRegisters) { 555 MOZ_ASSERT(ins->arrayType() <= Scalar::Uint32); 556 557 MOZ_ASSERT(ins->elements()->type() == MIRType::Elements); 558 MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr); 559 560 const LUse elements = useRegister(ins->elements()); 561 const LAllocation index = 562 useRegisterOrIndexConstant(ins->index(), ins->arrayType()); 563 const LAllocation value = useRegister(ins->value()); 564 565 // The underlying instruction is XCHG, which can operate on any 566 // register. 567 // 568 // If the target is a floating register (for Uint32) then we need 569 // a temp into which to exchange. 570 // 571 // If the source is a byte array then we need a register that has 572 // a byte size; in this case -- on x86 only -- pin the output to 573 // an appropriate register and use that as a temp in the back-end. 574 575 LDefinition tempDef = LDefinition::BogusTemp(); 576 if (ins->arrayType() == Scalar::Uint32) { 577 MOZ_ASSERT(ins->type() == MIRType::Double); 578 tempDef = temp(); 579 } 580 581 LAtomicExchangeTypedArrayElement* lir = new (alloc()) 582 LAtomicExchangeTypedArrayElement(elements, index, value, tempDef); 583 584 if (useI386ByteRegisters && ins->isByteArray()) { 585 defineFixed(lir, ins, LAllocation(AnyRegister(eax))); 586 } else { 587 define(lir, ins); 588 } 589 } 590 591 void LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop( 592 MAtomicTypedArrayElementBinop* ins, bool useI386ByteRegisters) { 593 MOZ_ASSERT(ins->arrayType() != Scalar::Uint8Clamped); 594 MOZ_ASSERT(!Scalar::isFloatingType(ins->arrayType())); 595 MOZ_ASSERT(ins->elements()->type() == MIRType::Elements); 596 MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr); 597 598 const LUse elements = useRegister(ins->elements()); 599 const LAllocation index = 600 useRegisterOrIndexConstant(ins->index(), ins->arrayType()); 601 602 // Case 1: the result of the operation is not used. 603 // 604 // We'll emit a single instruction: LOCK ADD, LOCK SUB, LOCK AND, 605 // LOCK OR, or LOCK XOR. We can do this even for the Uint32 case. 606 607 if (ins->isForEffect()) { 608 LAllocation value; 609 if (useI386ByteRegisters && ins->isByteArray() && 610 !ins->value()->isConstant()) { 611 value = useFixed(ins->value(), ebx); 612 } else { 613 value = useRegisterOrConstant(ins->value()); 614 } 615 616 LAtomicTypedArrayElementBinopForEffect* lir = new (alloc()) 617 LAtomicTypedArrayElementBinopForEffect(elements, index, value); 618 619 add(lir, ins); 620 return; 621 } 622 623 // Case 2: the result of the operation is used. 624 // 625 // For ADD and SUB we'll use XADD: 626 // 627 // movl src, output 628 // lock xaddl output, mem 629 // 630 // For the 8-bit variants XADD needs a byte register for the output. 631 // 632 // For AND/OR/XOR we need to use a CMPXCHG loop: 633 // 634 // movl *mem, eax 635 // L: mov eax, temp 636 // andl src, temp 637 // lock cmpxchg temp, mem ; reads eax also 638 // jnz L 639 // ; result in eax 640 // 641 // Note the placement of L, cmpxchg will update eax with *mem if 642 // *mem does not have the expected value, so reloading it at the 643 // top of the loop would be redundant. 644 // 645 // If the array is not a uint32 array then: 646 // - eax should be the output (one result of the cmpxchg) 647 // - there is a temp, which must have a byte register if 648 // the array has 1-byte elements elements 649 // 650 // If the array is a uint32 array then: 651 // - eax is the first temp 652 // - we also need a second temp 653 // 654 // There are optimization opportunities: 655 // - better register allocation in the x86 8-bit case, Bug #1077036. 656 657 bool bitOp = 658 !(ins->operation() == AtomicOp::Add || ins->operation() == AtomicOp::Sub); 659 bool fixedOutput = true; 660 bool reuseInput = false; 661 LDefinition tempDef1 = LDefinition::BogusTemp(); 662 LDefinition tempDef2 = LDefinition::BogusTemp(); 663 LAllocation value; 664 665 if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) { 666 value = useRegisterOrConstant(ins->value()); 667 fixedOutput = false; 668 if (bitOp) { 669 tempDef1 = tempFixed(eax); 670 tempDef2 = temp(); 671 } else { 672 tempDef1 = temp(); 673 } 674 } else if (useI386ByteRegisters && ins->isByteArray()) { 675 if (ins->value()->isConstant()) { 676 value = useRegisterOrConstant(ins->value()); 677 } else { 678 value = useFixed(ins->value(), ebx); 679 } 680 if (bitOp) { 681 tempDef1 = tempFixed(ecx); 682 } 683 } else if (bitOp) { 684 value = useRegisterOrConstant(ins->value()); 685 tempDef1 = temp(); 686 } else if (ins->value()->isConstant()) { 687 fixedOutput = false; 688 value = useRegisterOrConstant(ins->value()); 689 } else { 690 fixedOutput = false; 691 reuseInput = true; 692 value = useRegisterAtStart(ins->value()); 693 } 694 695 LAtomicTypedArrayElementBinop* lir = new (alloc()) 696 LAtomicTypedArrayElementBinop(elements, index, value, tempDef1, tempDef2); 697 698 if (fixedOutput) { 699 defineFixed(lir, ins, LAllocation(AnyRegister(eax))); 700 } else if (reuseInput) { 701 defineReuseInput(lir, ins, LAtomicTypedArrayElementBinop::ValueIndex); 702 } else { 703 define(lir, ins); 704 } 705 } 706 707 void LIRGenerator::visitCopySign(MCopySign* ins) { 708 MDefinition* lhs = ins->lhs(); 709 MDefinition* rhs = ins->rhs(); 710 711 MOZ_ASSERT(IsFloatingPointType(lhs->type())); 712 MOZ_ASSERT(lhs->type() == rhs->type()); 713 MOZ_ASSERT(lhs->type() == ins->type()); 714 715 LInstructionHelper<1, 2, 0>* lir; 716 if (lhs->type() == MIRType::Double) { 717 lir = new (alloc()) LCopySignD(); 718 } else { 719 lir = new (alloc()) LCopySignF(); 720 } 721 722 // As lowerForFPU, but we want rhs to be in a FP register too. 723 lir->setOperand(0, useRegisterAtStart(lhs)); 724 if (!Assembler::HasAVX()) { 725 lir->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs) 726 ? useRegister(rhs) 727 : useRegisterAtStart(rhs)); 728 defineReuseInput(lir, ins, 0); 729 } else { 730 lir->setOperand(1, useRegisterAtStart(rhs)); 731 define(lir, ins); 732 } 733 } 734 735 // These lowerings are really x86-shared but some Masm APIs are not yet 736 // available on x86. 737 738 // Ternary and binary operators require the dest register to be the same as 739 // their first input register, leading to a pattern of useRegisterAtStart + 740 // defineReuseInput. 741 742 void LIRGenerator::visitWasmTernarySimd128(MWasmTernarySimd128* ins) { 743 #ifdef ENABLE_WASM_SIMD 744 MOZ_ASSERT(ins->v0()->type() == MIRType::Simd128); 745 MOZ_ASSERT(ins->v1()->type() == MIRType::Simd128); 746 MOZ_ASSERT(ins->v2()->type() == MIRType::Simd128); 747 MOZ_ASSERT(ins->type() == MIRType::Simd128); 748 749 switch (ins->simdOp()) { 750 case wasm::SimdOp::V128Bitselect: { 751 // Enforcing lhs == output avoids one setup move. We would like to also 752 // enforce merging the control with the temp (with 753 // usRegisterAtStart(control) and tempCopy()), but the register allocator 754 // ignores those constraints at present. 755 auto* lir = new (alloc()) LWasmTernarySimd128( 756 useRegisterAtStart(ins->v0()), useRegister(ins->v1()), 757 useRegister(ins->v2()), tempSimd128(), ins->simdOp()); 758 defineReuseInput(lir, ins, LWasmTernarySimd128::V0Index); 759 break; 760 } 761 case wasm::SimdOp::F32x4RelaxedMadd: 762 case wasm::SimdOp::F32x4RelaxedNmadd: 763 case wasm::SimdOp::F64x2RelaxedMadd: 764 case wasm::SimdOp::F64x2RelaxedNmadd: { 765 auto* lir = new (alloc()) 766 LWasmTernarySimd128(useRegister(ins->v0()), useRegister(ins->v1()), 767 useRegisterAtStart(ins->v2()), 768 LDefinition::BogusTemp(), ins->simdOp()); 769 defineReuseInput(lir, ins, LWasmTernarySimd128::V2Index); 770 break; 771 } 772 case wasm::SimdOp::I32x4RelaxedDotI8x16I7x16AddS: { 773 auto* lir = new (alloc()) 774 LWasmTernarySimd128(useRegister(ins->v0()), useRegister(ins->v1()), 775 useRegisterAtStart(ins->v2()), 776 LDefinition::BogusTemp(), ins->simdOp()); 777 defineReuseInput(lir, ins, LWasmTernarySimd128::V2Index); 778 break; 779 } 780 case wasm::SimdOp::I8x16RelaxedLaneSelect: 781 case wasm::SimdOp::I16x8RelaxedLaneSelect: 782 case wasm::SimdOp::I32x4RelaxedLaneSelect: 783 case wasm::SimdOp::I64x2RelaxedLaneSelect: { 784 if (Assembler::HasAVX()) { 785 auto* lir = new (alloc()) LWasmTernarySimd128( 786 useRegisterAtStart(ins->v0()), useRegisterAtStart(ins->v1()), 787 useRegisterAtStart(ins->v2()), LDefinition::BogusTemp(), 788 ins->simdOp()); 789 define(lir, ins); 790 } else { 791 auto* lir = new (alloc()) LWasmTernarySimd128( 792 useRegister(ins->v0()), useRegisterAtStart(ins->v1()), 793 useFixed(ins->v2(), vmm0), LDefinition::BogusTemp(), ins->simdOp()); 794 defineReuseInput(lir, ins, LWasmTernarySimd128::V1Index); 795 } 796 break; 797 } 798 default: 799 MOZ_CRASH("NYI"); 800 } 801 #else 802 MOZ_CRASH("No SIMD"); 803 #endif 804 } 805 806 void LIRGenerator::visitWasmBinarySimd128(MWasmBinarySimd128* ins) { 807 #ifdef ENABLE_WASM_SIMD 808 MDefinition* lhs = ins->lhs(); 809 MDefinition* rhs = ins->rhs(); 810 wasm::SimdOp op = ins->simdOp(); 811 812 MOZ_ASSERT(lhs->type() == MIRType::Simd128); 813 MOZ_ASSERT(rhs->type() == MIRType::Simd128); 814 MOZ_ASSERT(ins->type() == MIRType::Simd128); 815 816 // Note MWasmBinarySimd128::foldsTo has already specialized operations that 817 // have a constant operand, so this takes care of more general cases of 818 // reordering, see ReorderCommutative. 819 if (ins->isCommutative()) { 820 ReorderCommutative(&lhs, &rhs, ins); 821 } 822 823 // Swap operands and change operation if necessary, these are all x86/x64 824 // dependent transformations. Except where noted, this is about avoiding 825 // unnecessary moves and fixups in the code generator macros. 826 bool swap = false; 827 switch (op) { 828 case wasm::SimdOp::V128AndNot: { 829 // Code generation requires the operands to be reversed. 830 swap = true; 831 break; 832 } 833 case wasm::SimdOp::I8x16LtS: { 834 swap = true; 835 op = wasm::SimdOp::I8x16GtS; 836 break; 837 } 838 case wasm::SimdOp::I8x16GeS: { 839 swap = true; 840 op = wasm::SimdOp::I8x16LeS; 841 break; 842 } 843 case wasm::SimdOp::I16x8LtS: { 844 swap = true; 845 op = wasm::SimdOp::I16x8GtS; 846 break; 847 } 848 case wasm::SimdOp::I16x8GeS: { 849 swap = true; 850 op = wasm::SimdOp::I16x8LeS; 851 break; 852 } 853 case wasm::SimdOp::I32x4LtS: { 854 swap = true; 855 op = wasm::SimdOp::I32x4GtS; 856 break; 857 } 858 case wasm::SimdOp::I32x4GeS: { 859 swap = true; 860 op = wasm::SimdOp::I32x4LeS; 861 break; 862 } 863 case wasm::SimdOp::F32x4Gt: { 864 swap = true; 865 op = wasm::SimdOp::F32x4Lt; 866 break; 867 } 868 case wasm::SimdOp::F32x4Ge: { 869 swap = true; 870 op = wasm::SimdOp::F32x4Le; 871 break; 872 } 873 case wasm::SimdOp::F64x2Gt: { 874 swap = true; 875 op = wasm::SimdOp::F64x2Lt; 876 break; 877 } 878 case wasm::SimdOp::F64x2Ge: { 879 swap = true; 880 op = wasm::SimdOp::F64x2Le; 881 break; 882 } 883 case wasm::SimdOp::F32x4PMin: 884 case wasm::SimdOp::F32x4PMax: 885 case wasm::SimdOp::F64x2PMin: 886 case wasm::SimdOp::F64x2PMax: { 887 // Code generation requires the operations to be reversed (the rhs is the 888 // output register). 889 swap = true; 890 break; 891 } 892 default: 893 break; 894 } 895 if (swap) { 896 MDefinition* tmp = lhs; 897 lhs = rhs; 898 rhs = tmp; 899 } 900 901 // Allocate temp registers 902 LDefinition tempReg0 = LDefinition::BogusTemp(); 903 LDefinition tempReg1 = LDefinition::BogusTemp(); 904 switch (op) { 905 case wasm::SimdOp::I64x2Mul: 906 tempReg0 = tempSimd128(); 907 break; 908 case wasm::SimdOp::F32x4Min: 909 case wasm::SimdOp::F32x4Max: 910 case wasm::SimdOp::F64x2Min: 911 case wasm::SimdOp::F64x2Max: 912 tempReg0 = tempSimd128(); 913 tempReg1 = tempSimd128(); 914 break; 915 case wasm::SimdOp::I64x2LtS: 916 case wasm::SimdOp::I64x2GtS: 917 case wasm::SimdOp::I64x2LeS: 918 case wasm::SimdOp::I64x2GeS: 919 // The compareForOrderingInt64x2AVX implementation does not require 920 // temps but needs SSE4.2 support. Checking if both AVX and SSE4.2 921 // are enabled. 922 if (!(Assembler::HasAVX() && Assembler::HasSSE42())) { 923 tempReg0 = tempSimd128(); 924 tempReg1 = tempSimd128(); 925 } 926 break; 927 default: 928 break; 929 } 930 931 // For binary ops, without AVX support, the Masm API always is usually 932 // (rhs, lhsDest) and requires AtStart+ReuseInput for the lhs. 933 // 934 // For a few ops, the API is actually (rhsDest, lhs) and the rules are the 935 // same but the reversed. We swapped operands above; they will be swapped 936 // again in the code generator to emit the right code. 937 // 938 // If AVX support is enabled, some binary ops can use output as destination, 939 // useRegisterAtStart is applied for both operands and no need for ReuseInput. 940 941 switch (op) { 942 case wasm::SimdOp::I8x16AvgrU: 943 case wasm::SimdOp::I16x8AvgrU: 944 case wasm::SimdOp::I8x16Add: 945 case wasm::SimdOp::I8x16AddSatS: 946 case wasm::SimdOp::I8x16AddSatU: 947 case wasm::SimdOp::I8x16Sub: 948 case wasm::SimdOp::I8x16SubSatS: 949 case wasm::SimdOp::I8x16SubSatU: 950 case wasm::SimdOp::I16x8Mul: 951 case wasm::SimdOp::I16x8MinS: 952 case wasm::SimdOp::I16x8MinU: 953 case wasm::SimdOp::I16x8MaxS: 954 case wasm::SimdOp::I16x8MaxU: 955 case wasm::SimdOp::I32x4Add: 956 case wasm::SimdOp::I32x4Sub: 957 case wasm::SimdOp::I32x4Mul: 958 case wasm::SimdOp::I32x4MinS: 959 case wasm::SimdOp::I32x4MinU: 960 case wasm::SimdOp::I32x4MaxS: 961 case wasm::SimdOp::I32x4MaxU: 962 case wasm::SimdOp::I64x2Add: 963 case wasm::SimdOp::I64x2Sub: 964 case wasm::SimdOp::I64x2Mul: 965 case wasm::SimdOp::F32x4Add: 966 case wasm::SimdOp::F32x4Sub: 967 case wasm::SimdOp::F32x4Mul: 968 case wasm::SimdOp::F32x4Div: 969 case wasm::SimdOp::F64x2Add: 970 case wasm::SimdOp::F64x2Sub: 971 case wasm::SimdOp::F64x2Mul: 972 case wasm::SimdOp::F64x2Div: 973 case wasm::SimdOp::F32x4Eq: 974 case wasm::SimdOp::F32x4Ne: 975 case wasm::SimdOp::F32x4Lt: 976 case wasm::SimdOp::F32x4Le: 977 case wasm::SimdOp::F64x2Eq: 978 case wasm::SimdOp::F64x2Ne: 979 case wasm::SimdOp::F64x2Lt: 980 case wasm::SimdOp::F64x2Le: 981 case wasm::SimdOp::F32x4PMin: 982 case wasm::SimdOp::F32x4PMax: 983 case wasm::SimdOp::F64x2PMin: 984 case wasm::SimdOp::F64x2PMax: 985 case wasm::SimdOp::I8x16Swizzle: 986 case wasm::SimdOp::I8x16RelaxedSwizzle: 987 case wasm::SimdOp::I8x16Eq: 988 case wasm::SimdOp::I8x16Ne: 989 case wasm::SimdOp::I8x16GtS: 990 case wasm::SimdOp::I8x16LeS: 991 case wasm::SimdOp::I8x16LtU: 992 case wasm::SimdOp::I8x16GtU: 993 case wasm::SimdOp::I8x16LeU: 994 case wasm::SimdOp::I8x16GeU: 995 case wasm::SimdOp::I16x8Eq: 996 case wasm::SimdOp::I16x8Ne: 997 case wasm::SimdOp::I16x8GtS: 998 case wasm::SimdOp::I16x8LeS: 999 case wasm::SimdOp::I16x8LtU: 1000 case wasm::SimdOp::I16x8GtU: 1001 case wasm::SimdOp::I16x8LeU: 1002 case wasm::SimdOp::I16x8GeU: 1003 case wasm::SimdOp::I32x4Eq: 1004 case wasm::SimdOp::I32x4Ne: 1005 case wasm::SimdOp::I32x4GtS: 1006 case wasm::SimdOp::I32x4LeS: 1007 case wasm::SimdOp::I32x4LtU: 1008 case wasm::SimdOp::I32x4GtU: 1009 case wasm::SimdOp::I32x4LeU: 1010 case wasm::SimdOp::I32x4GeU: 1011 case wasm::SimdOp::I64x2Eq: 1012 case wasm::SimdOp::I64x2Ne: 1013 case wasm::SimdOp::I64x2LtS: 1014 case wasm::SimdOp::I64x2GtS: 1015 case wasm::SimdOp::I64x2LeS: 1016 case wasm::SimdOp::I64x2GeS: 1017 case wasm::SimdOp::V128And: 1018 case wasm::SimdOp::V128Or: 1019 case wasm::SimdOp::V128Xor: 1020 case wasm::SimdOp::V128AndNot: 1021 case wasm::SimdOp::F32x4Min: 1022 case wasm::SimdOp::F32x4Max: 1023 case wasm::SimdOp::F64x2Min: 1024 case wasm::SimdOp::F64x2Max: 1025 case wasm::SimdOp::I8x16NarrowI16x8S: 1026 case wasm::SimdOp::I8x16NarrowI16x8U: 1027 case wasm::SimdOp::I16x8NarrowI32x4S: 1028 case wasm::SimdOp::I16x8NarrowI32x4U: 1029 case wasm::SimdOp::I32x4DotI16x8S: 1030 case wasm::SimdOp::I16x8ExtmulLowI8x16S: 1031 case wasm::SimdOp::I16x8ExtmulHighI8x16S: 1032 case wasm::SimdOp::I16x8ExtmulLowI8x16U: 1033 case wasm::SimdOp::I16x8ExtmulHighI8x16U: 1034 case wasm::SimdOp::I32x4ExtmulLowI16x8S: 1035 case wasm::SimdOp::I32x4ExtmulHighI16x8S: 1036 case wasm::SimdOp::I32x4ExtmulLowI16x8U: 1037 case wasm::SimdOp::I32x4ExtmulHighI16x8U: 1038 case wasm::SimdOp::I64x2ExtmulLowI32x4S: 1039 case wasm::SimdOp::I64x2ExtmulHighI32x4S: 1040 case wasm::SimdOp::I64x2ExtmulLowI32x4U: 1041 case wasm::SimdOp::I64x2ExtmulHighI32x4U: 1042 case wasm::SimdOp::I16x8Q15MulrSatS: 1043 case wasm::SimdOp::F32x4RelaxedMin: 1044 case wasm::SimdOp::F32x4RelaxedMax: 1045 case wasm::SimdOp::F64x2RelaxedMin: 1046 case wasm::SimdOp::F64x2RelaxedMax: 1047 case wasm::SimdOp::I16x8RelaxedQ15MulrS: 1048 case wasm::SimdOp::I16x8RelaxedDotI8x16I7x16S: 1049 case wasm::SimdOp::MozPMADDUBSW: 1050 if (isThreeOpAllowed()) { 1051 auto* lir = new (alloc()) 1052 LWasmBinarySimd128(useRegisterAtStart(lhs), useRegisterAtStart(rhs), 1053 tempReg0, tempReg1, op); 1054 define(lir, ins); 1055 break; 1056 } 1057 [[fallthrough]]; 1058 default: { 1059 LAllocation lhsDestAlloc = useRegisterAtStart(lhs); 1060 LAllocation rhsAlloc = willHaveDifferentLIRNodes(lhs, rhs) 1061 ? useRegister(rhs) 1062 : useRegisterAtStart(rhs); 1063 auto* lir = new (alloc()) 1064 LWasmBinarySimd128(lhsDestAlloc, rhsAlloc, tempReg0, tempReg1, op); 1065 defineReuseInput(lir, ins, LWasmBinarySimd128::LhsIndex); 1066 break; 1067 } 1068 } 1069 #else 1070 MOZ_CRASH("No SIMD"); 1071 #endif 1072 } 1073 1074 #ifdef ENABLE_WASM_SIMD 1075 bool MWasmTernarySimd128::specializeBitselectConstantMaskAsShuffle( 1076 int8_t shuffle[16]) { 1077 if (simdOp() != wasm::SimdOp::V128Bitselect) { 1078 return false; 1079 } 1080 1081 // Optimization when control vector is a mask with all 0 or all 1 per lane. 1082 // On x86, there is no bitselect, blend operations will be a win, 1083 // e.g. via PBLENDVB or PBLENDW. 1084 SimdConstant constant = static_cast<MWasmFloatConstant*>(v2())->toSimd128(); 1085 const SimdConstant::I8x16& bytes = constant.asInt8x16(); 1086 for (int8_t i = 0; i < 16; i++) { 1087 if (bytes[i] == -1) { 1088 shuffle[i] = i; 1089 } else if (bytes[i] == 0) { 1090 shuffle[i] = i + 16; 1091 } else { 1092 return false; 1093 } 1094 } 1095 return true; 1096 } 1097 bool MWasmTernarySimd128::canRelaxBitselect() { 1098 wasm::SimdOp simdOp; 1099 if (v2()->isWasmBinarySimd128()) { 1100 simdOp = v2()->toWasmBinarySimd128()->simdOp(); 1101 } else if (v2()->isWasmBinarySimd128WithConstant()) { 1102 simdOp = v2()->toWasmBinarySimd128WithConstant()->simdOp(); 1103 } else { 1104 return false; 1105 } 1106 switch (simdOp) { 1107 case wasm::SimdOp::I8x16Eq: 1108 case wasm::SimdOp::I8x16Ne: 1109 case wasm::SimdOp::I8x16GtS: 1110 case wasm::SimdOp::I8x16GeS: 1111 case wasm::SimdOp::I8x16LtS: 1112 case wasm::SimdOp::I8x16LeS: 1113 case wasm::SimdOp::I8x16GtU: 1114 case wasm::SimdOp::I8x16GeU: 1115 case wasm::SimdOp::I8x16LtU: 1116 case wasm::SimdOp::I8x16LeU: 1117 case wasm::SimdOp::I16x8Eq: 1118 case wasm::SimdOp::I16x8Ne: 1119 case wasm::SimdOp::I16x8GtS: 1120 case wasm::SimdOp::I16x8GeS: 1121 case wasm::SimdOp::I16x8LtS: 1122 case wasm::SimdOp::I16x8LeS: 1123 case wasm::SimdOp::I16x8GtU: 1124 case wasm::SimdOp::I16x8GeU: 1125 case wasm::SimdOp::I16x8LtU: 1126 case wasm::SimdOp::I16x8LeU: 1127 case wasm::SimdOp::I32x4Eq: 1128 case wasm::SimdOp::I32x4Ne: 1129 case wasm::SimdOp::I32x4GtS: 1130 case wasm::SimdOp::I32x4GeS: 1131 case wasm::SimdOp::I32x4LtS: 1132 case wasm::SimdOp::I32x4LeS: 1133 case wasm::SimdOp::I32x4GtU: 1134 case wasm::SimdOp::I32x4GeU: 1135 case wasm::SimdOp::I32x4LtU: 1136 case wasm::SimdOp::I32x4LeU: 1137 case wasm::SimdOp::I64x2Eq: 1138 case wasm::SimdOp::I64x2Ne: 1139 case wasm::SimdOp::I64x2GtS: 1140 case wasm::SimdOp::I64x2GeS: 1141 case wasm::SimdOp::I64x2LtS: 1142 case wasm::SimdOp::I64x2LeS: 1143 case wasm::SimdOp::F32x4Eq: 1144 case wasm::SimdOp::F32x4Ne: 1145 case wasm::SimdOp::F32x4Gt: 1146 case wasm::SimdOp::F32x4Ge: 1147 case wasm::SimdOp::F32x4Lt: 1148 case wasm::SimdOp::F32x4Le: 1149 case wasm::SimdOp::F64x2Eq: 1150 case wasm::SimdOp::F64x2Ne: 1151 case wasm::SimdOp::F64x2Gt: 1152 case wasm::SimdOp::F64x2Ge: 1153 case wasm::SimdOp::F64x2Lt: 1154 case wasm::SimdOp::F64x2Le: 1155 return true; 1156 default: 1157 break; 1158 } 1159 return false; 1160 } 1161 1162 bool MWasmBinarySimd128::canPmaddubsw() { 1163 MOZ_ASSERT(Assembler::HasSSE3()); 1164 return true; 1165 } 1166 #endif 1167 1168 bool MWasmBinarySimd128::specializeForConstantRhs() { 1169 // The order follows MacroAssembler.h, generally 1170 switch (simdOp()) { 1171 // Operations implemented by a single native instruction where it is 1172 // plausible that the rhs (after commutation if available) could be a 1173 // constant. 1174 // 1175 // Swizzle is not here because it was handled earlier in the pipeline. 1176 // 1177 // Integer compares >= and < are not here because they are not supported in 1178 // the hardware. 1179 // 1180 // Floating compares are not here because our patching machinery can't 1181 // handle them yet. 1182 // 1183 // Floating-point min and max (including pmin and pmax) are not here because 1184 // they are not straightforward to implement. 1185 case wasm::SimdOp::I8x16Add: 1186 case wasm::SimdOp::I16x8Add: 1187 case wasm::SimdOp::I32x4Add: 1188 case wasm::SimdOp::I64x2Add: 1189 case wasm::SimdOp::I8x16Sub: 1190 case wasm::SimdOp::I16x8Sub: 1191 case wasm::SimdOp::I32x4Sub: 1192 case wasm::SimdOp::I64x2Sub: 1193 case wasm::SimdOp::I16x8Mul: 1194 case wasm::SimdOp::I32x4Mul: 1195 case wasm::SimdOp::I8x16AddSatS: 1196 case wasm::SimdOp::I8x16AddSatU: 1197 case wasm::SimdOp::I16x8AddSatS: 1198 case wasm::SimdOp::I16x8AddSatU: 1199 case wasm::SimdOp::I8x16SubSatS: 1200 case wasm::SimdOp::I8x16SubSatU: 1201 case wasm::SimdOp::I16x8SubSatS: 1202 case wasm::SimdOp::I16x8SubSatU: 1203 case wasm::SimdOp::I8x16MinS: 1204 case wasm::SimdOp::I8x16MinU: 1205 case wasm::SimdOp::I16x8MinS: 1206 case wasm::SimdOp::I16x8MinU: 1207 case wasm::SimdOp::I32x4MinS: 1208 case wasm::SimdOp::I32x4MinU: 1209 case wasm::SimdOp::I8x16MaxS: 1210 case wasm::SimdOp::I8x16MaxU: 1211 case wasm::SimdOp::I16x8MaxS: 1212 case wasm::SimdOp::I16x8MaxU: 1213 case wasm::SimdOp::I32x4MaxS: 1214 case wasm::SimdOp::I32x4MaxU: 1215 case wasm::SimdOp::V128And: 1216 case wasm::SimdOp::V128Or: 1217 case wasm::SimdOp::V128Xor: 1218 case wasm::SimdOp::I8x16Eq: 1219 case wasm::SimdOp::I8x16Ne: 1220 case wasm::SimdOp::I8x16GtS: 1221 case wasm::SimdOp::I8x16LeS: 1222 case wasm::SimdOp::I16x8Eq: 1223 case wasm::SimdOp::I16x8Ne: 1224 case wasm::SimdOp::I16x8GtS: 1225 case wasm::SimdOp::I16x8LeS: 1226 case wasm::SimdOp::I32x4Eq: 1227 case wasm::SimdOp::I32x4Ne: 1228 case wasm::SimdOp::I32x4GtS: 1229 case wasm::SimdOp::I32x4LeS: 1230 case wasm::SimdOp::I64x2Mul: 1231 case wasm::SimdOp::F32x4Eq: 1232 case wasm::SimdOp::F32x4Ne: 1233 case wasm::SimdOp::F32x4Lt: 1234 case wasm::SimdOp::F32x4Le: 1235 case wasm::SimdOp::F64x2Eq: 1236 case wasm::SimdOp::F64x2Ne: 1237 case wasm::SimdOp::F64x2Lt: 1238 case wasm::SimdOp::F64x2Le: 1239 case wasm::SimdOp::I32x4DotI16x8S: 1240 case wasm::SimdOp::F32x4Add: 1241 case wasm::SimdOp::F64x2Add: 1242 case wasm::SimdOp::F32x4Sub: 1243 case wasm::SimdOp::F64x2Sub: 1244 case wasm::SimdOp::F32x4Div: 1245 case wasm::SimdOp::F64x2Div: 1246 case wasm::SimdOp::F32x4Mul: 1247 case wasm::SimdOp::F64x2Mul: 1248 case wasm::SimdOp::I8x16NarrowI16x8S: 1249 case wasm::SimdOp::I8x16NarrowI16x8U: 1250 case wasm::SimdOp::I16x8NarrowI32x4S: 1251 case wasm::SimdOp::I16x8NarrowI32x4U: 1252 return true; 1253 default: 1254 return false; 1255 } 1256 } 1257 1258 void LIRGenerator::visitWasmBinarySimd128WithConstant( 1259 MWasmBinarySimd128WithConstant* ins) { 1260 #ifdef ENABLE_WASM_SIMD 1261 MDefinition* lhs = ins->lhs(); 1262 1263 MOZ_ASSERT(lhs->type() == MIRType::Simd128); 1264 MOZ_ASSERT(ins->type() == MIRType::Simd128); 1265 1266 // Allocate temp registers 1267 LDefinition tempReg = LDefinition::BogusTemp(); 1268 switch (ins->simdOp()) { 1269 case wasm::SimdOp::I64x2Mul: 1270 tempReg = tempSimd128(); 1271 break; 1272 default: 1273 break; 1274 } 1275 1276 if (isThreeOpAllowed()) { 1277 // The non-destructive versions of instructions will be available 1278 // when AVX is enabled. 1279 LAllocation lhsAlloc = useRegisterAtStart(lhs); 1280 auto* lir = new (alloc()) 1281 LWasmBinarySimd128WithConstant(lhsAlloc, tempReg, ins->rhs()); 1282 define(lir, ins); 1283 } else { 1284 // Always beneficial to reuse the lhs register here, see discussion in 1285 // visitWasmBinarySimd128() and also code in specializeForConstantRhs(). 1286 LAllocation lhsDestAlloc = useRegisterAtStart(lhs); 1287 auto* lir = new (alloc()) 1288 LWasmBinarySimd128WithConstant(lhsDestAlloc, tempReg, ins->rhs()); 1289 defineReuseInput(lir, ins, LWasmBinarySimd128WithConstant::LhsIndex); 1290 } 1291 #else 1292 MOZ_CRASH("No SIMD"); 1293 #endif 1294 } 1295 1296 void LIRGenerator::visitWasmShiftSimd128(MWasmShiftSimd128* ins) { 1297 #ifdef ENABLE_WASM_SIMD 1298 MDefinition* lhs = ins->lhs(); 1299 MDefinition* rhs = ins->rhs(); 1300 1301 MOZ_ASSERT(lhs->type() == MIRType::Simd128); 1302 MOZ_ASSERT(rhs->type() == MIRType::Int32); 1303 MOZ_ASSERT(ins->type() == MIRType::Simd128); 1304 1305 if (rhs->isConstant()) { 1306 int32_t shiftCountMask; 1307 switch (ins->simdOp()) { 1308 case wasm::SimdOp::I8x16Shl: 1309 case wasm::SimdOp::I8x16ShrU: 1310 case wasm::SimdOp::I8x16ShrS: 1311 shiftCountMask = 7; 1312 break; 1313 case wasm::SimdOp::I16x8Shl: 1314 case wasm::SimdOp::I16x8ShrU: 1315 case wasm::SimdOp::I16x8ShrS: 1316 shiftCountMask = 15; 1317 break; 1318 case wasm::SimdOp::I32x4Shl: 1319 case wasm::SimdOp::I32x4ShrU: 1320 case wasm::SimdOp::I32x4ShrS: 1321 shiftCountMask = 31; 1322 break; 1323 case wasm::SimdOp::I64x2Shl: 1324 case wasm::SimdOp::I64x2ShrU: 1325 case wasm::SimdOp::I64x2ShrS: 1326 shiftCountMask = 63; 1327 break; 1328 default: 1329 MOZ_CRASH("Unexpected shift operation"); 1330 } 1331 1332 int32_t shiftCount = rhs->toConstant()->toInt32() & shiftCountMask; 1333 if (shiftCount == shiftCountMask) { 1334 // Check if possible to apply sign replication optimization. 1335 // For some ops the input shall be reused. 1336 switch (ins->simdOp()) { 1337 case wasm::SimdOp::I8x16ShrS: { 1338 auto* lir = 1339 new (alloc()) LWasmSignReplicationSimd128(useRegister(lhs)); 1340 define(lir, ins); 1341 return; 1342 } 1343 case wasm::SimdOp::I16x8ShrS: 1344 case wasm::SimdOp::I32x4ShrS: 1345 case wasm::SimdOp::I64x2ShrS: { 1346 auto* lir = new (alloc()) 1347 LWasmSignReplicationSimd128(useRegisterAtStart(lhs)); 1348 if (isThreeOpAllowed()) { 1349 define(lir, ins); 1350 } else { 1351 // For non-AVX, it is always beneficial to reuse the input. 1352 defineReuseInput(lir, ins, LWasmSignReplicationSimd128::SrcIndex); 1353 } 1354 return; 1355 } 1356 default: 1357 break; 1358 } 1359 } 1360 1361 # ifdef DEBUG 1362 js::wasm::ReportSimdAnalysis("shift -> constant shift"); 1363 # endif 1364 auto* lir = new (alloc()) 1365 LWasmConstantShiftSimd128(useRegisterAtStart(lhs), shiftCount); 1366 if (isThreeOpAllowed()) { 1367 define(lir, ins); 1368 } else { 1369 // For non-AVX, it is always beneficial to reuse the input. 1370 defineReuseInput(lir, ins, LWasmConstantShiftSimd128::SrcIndex); 1371 } 1372 return; 1373 } 1374 1375 # ifdef DEBUG 1376 js::wasm::ReportSimdAnalysis("shift -> variable shift"); 1377 # endif 1378 1379 LDefinition tempReg = LDefinition::BogusTemp(); 1380 switch (ins->simdOp()) { 1381 case wasm::SimdOp::I8x16Shl: 1382 case wasm::SimdOp::I8x16ShrS: 1383 case wasm::SimdOp::I8x16ShrU: 1384 case wasm::SimdOp::I64x2ShrS: 1385 tempReg = tempSimd128(); 1386 break; 1387 default: 1388 break; 1389 } 1390 1391 // Reusing the input if possible is never detrimental. 1392 LAllocation lhsDestAlloc = useRegisterAtStart(lhs); 1393 LAllocation rhsAlloc = useRegisterAtStart(rhs); 1394 auto* lir = 1395 new (alloc()) LWasmVariableShiftSimd128(lhsDestAlloc, rhsAlloc, tempReg); 1396 defineReuseInput(lir, ins, LWasmVariableShiftSimd128::LhsIndex); 1397 #else 1398 MOZ_CRASH("No SIMD"); 1399 #endif 1400 } 1401 1402 void LIRGenerator::visitWasmShuffleSimd128(MWasmShuffleSimd128* ins) { 1403 #ifdef ENABLE_WASM_SIMD 1404 MOZ_ASSERT(ins->lhs()->type() == MIRType::Simd128); 1405 MOZ_ASSERT(ins->rhs()->type() == MIRType::Simd128); 1406 MOZ_ASSERT(ins->type() == MIRType::Simd128); 1407 1408 SimdShuffle s = ins->shuffle(); 1409 switch (s.opd) { 1410 case SimdShuffle::Operand::LEFT: 1411 case SimdShuffle::Operand::RIGHT: { 1412 LAllocation src; 1413 bool reuse = false; 1414 switch (*s.permuteOp) { 1415 case SimdPermuteOp::MOVE: 1416 reuse = true; 1417 break; 1418 case SimdPermuteOp::BROADCAST_8x16: 1419 case SimdPermuteOp::BROADCAST_16x8: 1420 case SimdPermuteOp::PERMUTE_8x16: 1421 case SimdPermuteOp::PERMUTE_16x8: 1422 case SimdPermuteOp::PERMUTE_32x4: 1423 case SimdPermuteOp::ROTATE_RIGHT_8x16: 1424 case SimdPermuteOp::SHIFT_LEFT_8x16: 1425 case SimdPermuteOp::SHIFT_RIGHT_8x16: 1426 case SimdPermuteOp::REVERSE_16x8: 1427 case SimdPermuteOp::REVERSE_32x4: 1428 case SimdPermuteOp::REVERSE_64x2: 1429 case SimdPermuteOp::ZERO_EXTEND_8x16_TO_16x8: 1430 case SimdPermuteOp::ZERO_EXTEND_8x16_TO_32x4: 1431 case SimdPermuteOp::ZERO_EXTEND_8x16_TO_64x2: 1432 case SimdPermuteOp::ZERO_EXTEND_16x8_TO_32x4: 1433 case SimdPermuteOp::ZERO_EXTEND_16x8_TO_64x2: 1434 case SimdPermuteOp::ZERO_EXTEND_32x4_TO_64x2: 1435 // No need to reuse registers when VEX instructions are enabled. 1436 reuse = !Assembler::HasAVX(); 1437 break; 1438 default: 1439 MOZ_CRASH("Unexpected operator"); 1440 } 1441 if (s.opd == SimdShuffle::Operand::LEFT) { 1442 src = useRegisterAtStart(ins->lhs()); 1443 } else { 1444 src = useRegisterAtStart(ins->rhs()); 1445 } 1446 auto* lir = 1447 new (alloc()) LWasmPermuteSimd128(src, *s.permuteOp, s.control); 1448 if (reuse) { 1449 defineReuseInput(lir, ins, LWasmPermuteSimd128::SrcIndex); 1450 } else { 1451 define(lir, ins); 1452 } 1453 break; 1454 } 1455 case SimdShuffle::Operand::BOTH: 1456 case SimdShuffle::Operand::BOTH_SWAPPED: { 1457 LDefinition temp = LDefinition::BogusTemp(); 1458 switch (*s.shuffleOp) { 1459 case SimdShuffleOp::BLEND_8x16: 1460 temp = Assembler::HasAVX() ? tempSimd128() : tempFixed(xmm0); 1461 break; 1462 default: 1463 break; 1464 } 1465 if (isThreeOpAllowed()) { 1466 LAllocation lhs; 1467 LAllocation rhs; 1468 if (s.opd == SimdShuffle::Operand::BOTH) { 1469 lhs = useRegisterAtStart(ins->lhs()); 1470 rhs = useRegisterAtStart(ins->rhs()); 1471 } else { 1472 lhs = useRegisterAtStart(ins->rhs()); 1473 rhs = useRegisterAtStart(ins->lhs()); 1474 } 1475 auto* lir = new (alloc()) 1476 LWasmShuffleSimd128(lhs, rhs, temp, *s.shuffleOp, s.control); 1477 define(lir, ins); 1478 } else { 1479 LAllocation lhs; 1480 LAllocation rhs; 1481 if (s.opd == SimdShuffle::Operand::BOTH) { 1482 lhs = useRegisterAtStart(ins->lhs()); 1483 rhs = useRegister(ins->rhs()); 1484 } else { 1485 lhs = useRegisterAtStart(ins->rhs()); 1486 rhs = useRegister(ins->lhs()); 1487 } 1488 auto* lir = new (alloc()) 1489 LWasmShuffleSimd128(lhs, rhs, temp, *s.shuffleOp, s.control); 1490 defineReuseInput(lir, ins, LWasmShuffleSimd128::LhsIndex); 1491 } 1492 break; 1493 } 1494 } 1495 #else 1496 MOZ_CRASH("No SIMD"); 1497 #endif 1498 } 1499 1500 void LIRGenerator::visitWasmReplaceLaneSimd128(MWasmReplaceLaneSimd128* ins) { 1501 #ifdef ENABLE_WASM_SIMD 1502 MOZ_ASSERT(ins->lhs()->type() == MIRType::Simd128); 1503 MOZ_ASSERT(ins->type() == MIRType::Simd128); 1504 1505 // If AVX support is disabled, the Masm API is (rhs, lhsDest) and requires 1506 // AtStart+ReuseInput for the lhs. For type reasons, the rhs will never be 1507 // the same as the lhs and is therefore a plain Use. 1508 // 1509 // If AVX support is enabled, useRegisterAtStart is preferred. 1510 1511 if (ins->rhs()->type() == MIRType::Int64) { 1512 if (isThreeOpAllowed()) { 1513 auto* lir = new (alloc()) LWasmReplaceInt64LaneSimd128( 1514 useRegisterAtStart(ins->lhs()), useInt64RegisterAtStart(ins->rhs())); 1515 define(lir, ins); 1516 } else { 1517 auto* lir = new (alloc()) LWasmReplaceInt64LaneSimd128( 1518 useRegisterAtStart(ins->lhs()), useInt64Register(ins->rhs())); 1519 defineReuseInput(lir, ins, LWasmReplaceInt64LaneSimd128::LhsIndex); 1520 } 1521 } else { 1522 if (isThreeOpAllowed()) { 1523 auto* lir = new (alloc()) LWasmReplaceLaneSimd128( 1524 useRegisterAtStart(ins->lhs()), useRegisterAtStart(ins->rhs())); 1525 define(lir, ins); 1526 } else { 1527 auto* lir = new (alloc()) LWasmReplaceLaneSimd128( 1528 useRegisterAtStart(ins->lhs()), useRegister(ins->rhs())); 1529 defineReuseInput(lir, ins, LWasmReplaceLaneSimd128::LhsIndex); 1530 } 1531 } 1532 #else 1533 MOZ_CRASH("No SIMD"); 1534 #endif 1535 } 1536 1537 void LIRGenerator::visitWasmScalarToSimd128(MWasmScalarToSimd128* ins) { 1538 #ifdef ENABLE_WASM_SIMD 1539 MOZ_ASSERT(ins->type() == MIRType::Simd128); 1540 1541 switch (ins->input()->type()) { 1542 case MIRType::Int64: { 1543 // 64-bit integer splats. 1544 // Load-and-(sign|zero)extend. 1545 auto* lir = new (alloc()) 1546 LWasmInt64ToSimd128(useInt64RegisterAtStart(ins->input())); 1547 define(lir, ins); 1548 break; 1549 } 1550 case MIRType::Float32: 1551 case MIRType::Double: { 1552 // Floating-point splats. 1553 // Ideally we save a move on SSE systems by reusing the input register, 1554 // but since the input and output register types differ, we can't. 1555 auto* lir = 1556 new (alloc()) LWasmScalarToSimd128(useRegisterAtStart(ins->input())); 1557 define(lir, ins); 1558 break; 1559 } 1560 default: { 1561 // 32-bit integer splats. 1562 auto* lir = 1563 new (alloc()) LWasmScalarToSimd128(useRegisterAtStart(ins->input())); 1564 define(lir, ins); 1565 break; 1566 } 1567 } 1568 #else 1569 MOZ_CRASH("No SIMD"); 1570 #endif 1571 } 1572 1573 void LIRGenerator::visitWasmUnarySimd128(MWasmUnarySimd128* ins) { 1574 #ifdef ENABLE_WASM_SIMD 1575 MOZ_ASSERT(ins->input()->type() == MIRType::Simd128); 1576 MOZ_ASSERT(ins->type() == MIRType::Simd128); 1577 1578 bool useAtStart = false; 1579 bool reuseInput = false; 1580 LDefinition tempReg = LDefinition::BogusTemp(); 1581 switch (ins->simdOp()) { 1582 case wasm::SimdOp::I8x16Neg: 1583 case wasm::SimdOp::I16x8Neg: 1584 case wasm::SimdOp::I32x4Neg: 1585 case wasm::SimdOp::I64x2Neg: 1586 case wasm::SimdOp::I16x8ExtaddPairwiseI8x16S: 1587 // Prefer src != dest to avoid an unconditional src->temp move. 1588 MOZ_ASSERT(!reuseInput); 1589 // If AVX is enabled, we prefer useRegisterAtStart. 1590 useAtStart = isThreeOpAllowed(); 1591 break; 1592 case wasm::SimdOp::F32x4Neg: 1593 case wasm::SimdOp::F64x2Neg: 1594 case wasm::SimdOp::F32x4Abs: 1595 case wasm::SimdOp::F64x2Abs: 1596 case wasm::SimdOp::V128Not: 1597 case wasm::SimdOp::F32x4Sqrt: 1598 case wasm::SimdOp::F64x2Sqrt: 1599 case wasm::SimdOp::I8x16Abs: 1600 case wasm::SimdOp::I16x8Abs: 1601 case wasm::SimdOp::I32x4Abs: 1602 case wasm::SimdOp::I64x2Abs: 1603 case wasm::SimdOp::I32x4TruncSatF32x4S: 1604 case wasm::SimdOp::F32x4ConvertI32x4U: 1605 case wasm::SimdOp::I16x8ExtaddPairwiseI8x16U: 1606 case wasm::SimdOp::I32x4ExtaddPairwiseI16x8S: 1607 case wasm::SimdOp::I32x4ExtaddPairwiseI16x8U: 1608 case wasm::SimdOp::I32x4RelaxedTruncF32x4S: 1609 case wasm::SimdOp::I32x4RelaxedTruncF32x4U: 1610 case wasm::SimdOp::I32x4RelaxedTruncF64x2SZero: 1611 case wasm::SimdOp::I32x4RelaxedTruncF64x2UZero: 1612 case wasm::SimdOp::I64x2ExtendHighI32x4S: 1613 case wasm::SimdOp::I64x2ExtendHighI32x4U: 1614 // Prefer src == dest to avoid an unconditional src->dest move 1615 // for better performance in non-AVX mode (e.g. non-PSHUFD use). 1616 useAtStart = true; 1617 reuseInput = !isThreeOpAllowed(); 1618 break; 1619 case wasm::SimdOp::I32x4TruncSatF32x4U: 1620 case wasm::SimdOp::I32x4TruncSatF64x2SZero: 1621 case wasm::SimdOp::I32x4TruncSatF64x2UZero: 1622 case wasm::SimdOp::I8x16Popcnt: 1623 tempReg = tempSimd128(); 1624 // Prefer src == dest to avoid an unconditional src->dest move 1625 // in non-AVX mode. 1626 useAtStart = true; 1627 reuseInput = !isThreeOpAllowed(); 1628 break; 1629 case wasm::SimdOp::I16x8ExtendLowI8x16S: 1630 case wasm::SimdOp::I16x8ExtendHighI8x16S: 1631 case wasm::SimdOp::I16x8ExtendLowI8x16U: 1632 case wasm::SimdOp::I16x8ExtendHighI8x16U: 1633 case wasm::SimdOp::I32x4ExtendLowI16x8S: 1634 case wasm::SimdOp::I32x4ExtendHighI16x8S: 1635 case wasm::SimdOp::I32x4ExtendLowI16x8U: 1636 case wasm::SimdOp::I32x4ExtendHighI16x8U: 1637 case wasm::SimdOp::I64x2ExtendLowI32x4S: 1638 case wasm::SimdOp::I64x2ExtendLowI32x4U: 1639 case wasm::SimdOp::F32x4ConvertI32x4S: 1640 case wasm::SimdOp::F32x4Ceil: 1641 case wasm::SimdOp::F32x4Floor: 1642 case wasm::SimdOp::F32x4Trunc: 1643 case wasm::SimdOp::F32x4Nearest: 1644 case wasm::SimdOp::F64x2Ceil: 1645 case wasm::SimdOp::F64x2Floor: 1646 case wasm::SimdOp::F64x2Trunc: 1647 case wasm::SimdOp::F64x2Nearest: 1648 case wasm::SimdOp::F32x4DemoteF64x2Zero: 1649 case wasm::SimdOp::F64x2PromoteLowF32x4: 1650 case wasm::SimdOp::F64x2ConvertLowI32x4S: 1651 case wasm::SimdOp::F64x2ConvertLowI32x4U: 1652 // Prefer src == dest to exert the lowest register pressure on the 1653 // surrounding code. 1654 useAtStart = true; 1655 MOZ_ASSERT(!reuseInput); 1656 break; 1657 default: 1658 MOZ_CRASH("Unary SimdOp not implemented"); 1659 } 1660 1661 LUse inputUse = 1662 useAtStart ? useRegisterAtStart(ins->input()) : useRegister(ins->input()); 1663 LWasmUnarySimd128* lir = new (alloc()) LWasmUnarySimd128(inputUse, tempReg); 1664 if (reuseInput) { 1665 defineReuseInput(lir, ins, LWasmUnarySimd128::SrcIndex); 1666 } else { 1667 define(lir, ins); 1668 } 1669 #else 1670 MOZ_CRASH("No SIMD"); 1671 #endif 1672 } 1673 1674 void LIRGenerator::visitWasmLoadLaneSimd128(MWasmLoadLaneSimd128* ins) { 1675 #ifdef ENABLE_WASM_SIMD 1676 // A trick: On 32-bit systems, the base pointer is 32 bits (it was bounds 1677 // checked and then chopped). On 64-bit systems, it can be 32 bits or 64 1678 // bits. Either way, it fits in a GPR so we can ignore the 1679 // Register/Register64 distinction here. 1680 # ifndef JS_64BIT 1681 MOZ_ASSERT(ins->base()->type() == MIRType::Int32); 1682 # endif 1683 LUse base = useRegisterAtStart(ins->base()); 1684 LUse inputUse = useRegisterAtStart(ins->value()); 1685 LAllocation memoryBase = ins->hasMemoryBase() 1686 ? useRegisterAtStart(ins->memoryBase()) 1687 : LAllocation(); 1688 auto* lir = new (alloc()) LWasmLoadLaneSimd128(base, inputUse, memoryBase); 1689 defineReuseInput(lir, ins, LWasmLoadLaneSimd128::SrcIndex); 1690 #else 1691 MOZ_CRASH("No SIMD"); 1692 #endif 1693 } 1694 1695 void LIRGenerator::visitWasmStoreLaneSimd128(MWasmStoreLaneSimd128* ins) { 1696 #ifdef ENABLE_WASM_SIMD 1697 // See comment above. 1698 # ifndef JS_64BIT 1699 MOZ_ASSERT(ins->base()->type() == MIRType::Int32); 1700 # endif 1701 LUse base = useRegisterAtStart(ins->base()); 1702 LUse input = useRegisterAtStart(ins->value()); 1703 LAllocation memoryBase = ins->hasMemoryBase() 1704 ? useRegisterAtStart(ins->memoryBase()) 1705 : LAllocation(); 1706 auto* lir = new (alloc()) LWasmStoreLaneSimd128(base, input, memoryBase); 1707 add(lir, ins); 1708 #else 1709 MOZ_CRASH("No SIMD"); 1710 #endif 1711 } 1712 1713 #ifdef ENABLE_WASM_SIMD 1714 1715 bool LIRGeneratorX86Shared::canFoldReduceSimd128AndBranch(wasm::SimdOp op) { 1716 switch (op) { 1717 case wasm::SimdOp::V128AnyTrue: 1718 case wasm::SimdOp::I8x16AllTrue: 1719 case wasm::SimdOp::I16x8AllTrue: 1720 case wasm::SimdOp::I32x4AllTrue: 1721 case wasm::SimdOp::I64x2AllTrue: 1722 case wasm::SimdOp::I16x8Bitmask: 1723 return true; 1724 default: 1725 return false; 1726 } 1727 } 1728 1729 bool LIRGeneratorX86Shared::canEmitWasmReduceSimd128AtUses( 1730 MWasmReduceSimd128* ins) { 1731 if (!ins->canEmitAtUses()) { 1732 return false; 1733 } 1734 // Only specific ops generating int32. 1735 if (ins->type() != MIRType::Int32) { 1736 return false; 1737 } 1738 if (!canFoldReduceSimd128AndBranch(ins->simdOp())) { 1739 return false; 1740 } 1741 // If never used then defer (it will be removed). 1742 MUseIterator iter(ins->usesBegin()); 1743 if (iter == ins->usesEnd()) { 1744 return true; 1745 } 1746 // We require an MTest consumer. 1747 MNode* node = iter->consumer(); 1748 if (!node->isDefinition() || !node->toDefinition()->isTest()) { 1749 return false; 1750 } 1751 // Defer only if there's only one use. 1752 iter++; 1753 return iter == ins->usesEnd(); 1754 } 1755 1756 #endif // ENABLE_WASM_SIMD 1757 1758 void LIRGenerator::visitWasmReduceSimd128(MWasmReduceSimd128* ins) { 1759 #ifdef ENABLE_WASM_SIMD 1760 if (canEmitWasmReduceSimd128AtUses(ins)) { 1761 emitAtUses(ins); 1762 return; 1763 } 1764 1765 // Reductions (any_true, all_true, bitmask, extract_lane) uniformly prefer 1766 // useRegisterAtStart: 1767 // 1768 // - In most cases, the input type differs from the output type, so there's no 1769 // conflict and it doesn't really matter. 1770 // 1771 // - For extract_lane(0) on F32x4 and F64x2, input == output results in zero 1772 // code being generated. 1773 // 1774 // - For extract_lane(k > 0) on F32x4 and F64x2, allowing the input register 1775 // to be targeted lowers register pressure if it's the last use of the 1776 // input. 1777 1778 if (ins->type() == MIRType::Int64) { 1779 auto* lir = new (alloc()) 1780 LWasmReduceSimd128ToInt64(useRegisterAtStart(ins->input())); 1781 defineInt64(lir, ins); 1782 } else { 1783 // Ideally we would reuse the input register for floating extract_lane if 1784 // the lane is zero, but constraints in the register allocator require the 1785 // input and output register types to be the same. 1786 auto* lir = 1787 new (alloc()) LWasmReduceSimd128(useRegisterAtStart(ins->input())); 1788 define(lir, ins); 1789 } 1790 #else 1791 MOZ_CRASH("No SIMD"); 1792 #endif 1793 }