CodeGenerator-x86-shared.cpp (121459B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "jit/x86-shared/CodeGenerator-x86-shared.h" 8 9 #include "mozilla/DebugOnly.h" 10 #include "mozilla/MathAlgorithms.h" 11 12 #include "jit/CodeGenerator.h" 13 #include "jit/InlineScriptTree.h" 14 #include "jit/JitRuntime.h" 15 #include "jit/RangeAnalysis.h" 16 #include "jit/ReciprocalMulConstants.h" 17 #include "js/ScalarType.h" // js::Scalar::Type 18 19 #include "jit/MacroAssembler-inl.h" 20 #include "jit/shared/CodeGenerator-shared-inl.h" 21 22 using namespace js; 23 using namespace js::jit; 24 25 using mozilla::Abs; 26 using mozilla::DebugOnly; 27 using mozilla::FloorLog2; 28 using mozilla::NegativeInfinity; 29 30 using JS::GenericNaN; 31 32 namespace js { 33 namespace jit { 34 35 CodeGeneratorX86Shared::CodeGeneratorX86Shared( 36 MIRGenerator* gen, LIRGraph* graph, MacroAssembler* masm, 37 const wasm::CodeMetadata* wasmCodeMeta) 38 : CodeGeneratorShared(gen, graph, masm, wasmCodeMeta) {} 39 40 #ifdef JS_PUNBOX64 41 Operand CodeGeneratorX86Shared::ToOperandOrRegister64( 42 const LInt64Allocation& input) { 43 return ToOperand(input.value()); 44 } 45 #else 46 Register64 CodeGeneratorX86Shared::ToOperandOrRegister64( 47 const LInt64Allocation& input) { 48 return ToRegister64(input); 49 } 50 #endif 51 52 void CodeGeneratorX86Shared::emitBranch(Assembler::Condition cond, 53 MBasicBlock* mirTrue, 54 MBasicBlock* mirFalse) { 55 if (isNextBlock(mirFalse->lir())) { 56 jumpToBlock(mirTrue, cond); 57 } else { 58 jumpToBlock(mirFalse, Assembler::InvertCondition(cond)); 59 jumpToBlock(mirTrue); 60 } 61 } 62 63 void CodeGeneratorX86Shared::emitBranch(Assembler::DoubleCondition cond, 64 MBasicBlock* ifTrue, 65 MBasicBlock* ifFalse, 66 Assembler::NaNCond ifNaN) { 67 if (ifNaN == Assembler::NaN_IsFalse) { 68 jumpToBlock(ifFalse, Assembler::Parity); 69 } else if (ifNaN == Assembler::NaN_IsTrue) { 70 jumpToBlock(ifTrue, Assembler::Parity); 71 } 72 emitBranch(Assembler::ConditionFromDoubleCondition(cond), ifTrue, ifFalse); 73 } 74 75 void CodeGenerator::visitTestDAndBranch(LTestDAndBranch* test) { 76 const LAllocation* opd = test->input(); 77 78 // vucomisd flags: 79 // Z P C 80 // --------- 81 // NaN 1 1 1 82 // > 0 0 0 83 // < 0 0 1 84 // = 1 0 0 85 // 86 // NaN is falsey, so comparing against 0 and then using the Z flag is 87 // enough to determine which branch to take. 88 ScratchDoubleScope scratch(masm); 89 masm.zeroDouble(scratch); 90 masm.vucomisd(scratch, ToFloatRegister(opd)); 91 emitBranch(Assembler::NotEqual, test->ifTrue(), test->ifFalse()); 92 } 93 94 void CodeGenerator::visitTestFAndBranch(LTestFAndBranch* test) { 95 const LAllocation* opd = test->input(); 96 // vucomiss flags are the same as doubles; see comment above 97 { 98 ScratchFloat32Scope scratch(masm); 99 masm.zeroFloat32(scratch); 100 masm.vucomiss(scratch, ToFloatRegister(opd)); 101 } 102 emitBranch(Assembler::NotEqual, test->ifTrue(), test->ifFalse()); 103 } 104 105 static Assembler::DoubleCondition ToDoubleCondition(FloatRegister lhs, 106 FloatRegister rhs, 107 JSOp op) { 108 if (lhs == rhs) { 109 switch (op) { 110 case JSOp::Eq: 111 case JSOp::StrictEq: 112 case JSOp::Le: 113 case JSOp::Ge: 114 return Assembler::DoubleOrdered; 115 case JSOp::Ne: 116 case JSOp::StrictNe: 117 return Assembler::DoubleUnordered; 118 default: 119 break; 120 } 121 } 122 return JSOpToDoubleCondition(op); 123 } 124 125 void CodeGenerator::visitCompareD(LCompareD* comp) { 126 FloatRegister lhs = ToFloatRegister(comp->left()); 127 FloatRegister rhs = ToFloatRegister(comp->right()); 128 Register output = ToRegister(comp->output()); 129 130 Assembler::DoubleCondition cond = 131 ToDoubleCondition(lhs, rhs, comp->mir()->jsop()); 132 133 Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond); 134 if (comp->mir()->operandsAreNeverNaN()) { 135 nanCond = Assembler::NaN_HandledByCond; 136 } 137 138 bool destIsZero = masm.maybeEmitSetZeroByteRegister(output); 139 masm.compareDouble(cond, lhs, rhs); 140 masm.emitSet(Assembler::ConditionFromDoubleCondition(cond), output, 141 destIsZero, nanCond); 142 } 143 144 void CodeGenerator::visitCompareF(LCompareF* comp) { 145 FloatRegister lhs = ToFloatRegister(comp->left()); 146 FloatRegister rhs = ToFloatRegister(comp->right()); 147 Register output = ToRegister(comp->output()); 148 149 Assembler::DoubleCondition cond = 150 ToDoubleCondition(lhs, rhs, comp->mir()->jsop()); 151 152 Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond); 153 if (comp->mir()->operandsAreNeverNaN()) { 154 nanCond = Assembler::NaN_HandledByCond; 155 } 156 157 bool destIsZero = masm.maybeEmitSetZeroByteRegister(output); 158 masm.compareFloat(cond, lhs, rhs); 159 masm.emitSet(Assembler::ConditionFromDoubleCondition(cond), output, 160 destIsZero, nanCond); 161 } 162 163 void CodeGenerator::visitNotD(LNotD* ins) { 164 FloatRegister opd = ToFloatRegister(ins->input()); 165 Register output = ToRegister(ins->output()); 166 167 // Not returns true if the input is a NaN. We don't have to worry about 168 // it if we know the input is never NaN though. 169 Assembler::NaNCond nanCond = Assembler::NaN_IsTrue; 170 if (ins->mir()->operandIsNeverNaN()) { 171 nanCond = Assembler::NaN_HandledByCond; 172 } 173 174 bool destIsZero = masm.maybeEmitSetZeroByteRegister(output); 175 ScratchDoubleScope scratch(masm); 176 masm.zeroDouble(scratch); 177 masm.compareDouble(Assembler::DoubleEqualOrUnordered, opd, scratch); 178 masm.emitSet(Assembler::Equal, output, destIsZero, nanCond); 179 } 180 181 void CodeGenerator::visitNotF(LNotF* ins) { 182 FloatRegister opd = ToFloatRegister(ins->input()); 183 Register output = ToRegister(ins->output()); 184 185 // Not returns true if the input is a NaN. We don't have to worry about 186 // it if we know the input is never NaN though. 187 Assembler::NaNCond nanCond = Assembler::NaN_IsTrue; 188 if (ins->mir()->operandIsNeverNaN()) { 189 nanCond = Assembler::NaN_HandledByCond; 190 } 191 192 bool destIsZero = masm.maybeEmitSetZeroByteRegister(output); 193 ScratchFloat32Scope scratch(masm); 194 masm.zeroFloat32(scratch); 195 masm.compareFloat(Assembler::DoubleEqualOrUnordered, opd, scratch); 196 masm.emitSet(Assembler::Equal, output, destIsZero, nanCond); 197 } 198 199 void CodeGenerator::visitCompareDAndBranch(LCompareDAndBranch* comp) { 200 FloatRegister lhs = ToFloatRegister(comp->left()); 201 FloatRegister rhs = ToFloatRegister(comp->right()); 202 203 Assembler::DoubleCondition cond = 204 ToDoubleCondition(lhs, rhs, comp->cmpMir()->jsop()); 205 206 Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond); 207 if (comp->cmpMir()->operandsAreNeverNaN()) { 208 nanCond = Assembler::NaN_HandledByCond; 209 } 210 211 masm.compareDouble(cond, lhs, rhs); 212 emitBranch(cond, comp->ifTrue(), comp->ifFalse(), nanCond); 213 } 214 215 void CodeGenerator::visitCompareFAndBranch(LCompareFAndBranch* comp) { 216 FloatRegister lhs = ToFloatRegister(comp->left()); 217 FloatRegister rhs = ToFloatRegister(comp->right()); 218 219 Assembler::DoubleCondition cond = 220 ToDoubleCondition(lhs, rhs, comp->cmpMir()->jsop()); 221 222 Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond); 223 if (comp->cmpMir()->operandsAreNeverNaN()) { 224 nanCond = Assembler::NaN_HandledByCond; 225 } 226 227 masm.compareFloat(cond, lhs, rhs); 228 emitBranch(cond, comp->ifTrue(), comp->ifFalse(), nanCond); 229 } 230 231 void CodeGenerator::visitWasmStackArg(LWasmStackArg* ins) { 232 const MWasmStackArg* mir = ins->mir(); 233 Address dst(StackPointer, mir->spOffset()); 234 if (ins->arg()->isConstant()) { 235 masm.storePtr(ImmWord(ToInt32(ins->arg())), dst); 236 } else if (ins->arg()->isGeneralReg()) { 237 masm.storePtr(ToRegister(ins->arg()), dst); 238 } else { 239 switch (mir->input()->type()) { 240 case MIRType::Double: 241 masm.storeDouble(ToFloatRegister(ins->arg()), dst); 242 return; 243 case MIRType::Float32: 244 masm.storeFloat32(ToFloatRegister(ins->arg()), dst); 245 return; 246 #ifdef ENABLE_WASM_SIMD 247 case MIRType::Simd128: 248 masm.storeUnalignedSimd128(ToFloatRegister(ins->arg()), dst); 249 return; 250 #endif 251 default: 252 break; 253 } 254 MOZ_CRASH("unexpected mir type in WasmStackArg"); 255 } 256 } 257 258 void CodeGenerator::visitWasmStackArgI64(LWasmStackArgI64* ins) { 259 const MWasmStackArg* mir = ins->mir(); 260 Address dst(StackPointer, mir->spOffset()); 261 if (IsConstant(ins->arg())) { 262 masm.store64(Imm64(ToInt64(ins->arg())), dst); 263 } else { 264 masm.store64(ToRegister64(ins->arg()), dst); 265 } 266 } 267 268 void CodeGenerator::visitWasmSelect(LWasmSelect* ins) { 269 MIRType mirType = ins->mir()->type(); 270 271 Register cond = ToRegister(ins->condExpr()); 272 Operand falseExpr = ToOperand(ins->falseExpr()); 273 274 masm.test32(cond, cond); 275 276 if (mirType == MIRType::Int32 || mirType == MIRType::WasmAnyRef) { 277 Register out = ToRegister(ins->output()); 278 MOZ_ASSERT(ToRegister(ins->trueExpr()) == out, 279 "true expr input is reused for output"); 280 if (mirType == MIRType::Int32) { 281 masm.cmovz32(falseExpr, out); 282 } else { 283 masm.cmovzPtr(falseExpr, out); 284 } 285 return; 286 } 287 288 FloatRegister out = ToFloatRegister(ins->output()); 289 MOZ_ASSERT(ToFloatRegister(ins->trueExpr()) == out, 290 "true expr input is reused for output"); 291 292 Label done; 293 masm.j(Assembler::NonZero, &done); 294 295 if (mirType == MIRType::Float32) { 296 if (falseExpr.kind() == Operand::FPREG) { 297 masm.moveFloat32(ToFloatRegister(ins->falseExpr()), out); 298 } else { 299 masm.loadFloat32(falseExpr, out); 300 } 301 } else if (mirType == MIRType::Double) { 302 if (falseExpr.kind() == Operand::FPREG) { 303 masm.moveDouble(ToFloatRegister(ins->falseExpr()), out); 304 } else { 305 masm.loadDouble(falseExpr, out); 306 } 307 } else if (mirType == MIRType::Simd128) { 308 if (falseExpr.kind() == Operand::FPREG) { 309 masm.moveSimd128(ToFloatRegister(ins->falseExpr()), out); 310 } else { 311 masm.loadUnalignedSimd128(falseExpr, out); 312 } 313 } else { 314 MOZ_CRASH("unhandled type in visitWasmSelect!"); 315 } 316 317 masm.bind(&done); 318 } 319 320 void CodeGenerator::visitAsmJSLoadHeap(LAsmJSLoadHeap* ins) { 321 const MAsmJSLoadHeap* mir = ins->mir(); 322 MOZ_ASSERT(mir->access().offset64() == 0); 323 324 const LAllocation* ptr = ins->ptr(); 325 const LAllocation* boundsCheckLimit = ins->boundsCheckLimit(); 326 AnyRegister out = ToAnyRegister(ins->output()); 327 328 Scalar::Type accessType = mir->accessType(); 329 330 OutOfLineCode* ool = nullptr; 331 if (mir->needsBoundsCheck()) { 332 ool = new (alloc()) LambdaOutOfLineCode([=, this](OutOfLineCode& ool) { 333 switch (accessType) { 334 case Scalar::Int64: 335 case Scalar::BigInt64: 336 case Scalar::BigUint64: 337 case Scalar::Simd128: 338 case Scalar::Float16: 339 case Scalar::MaxTypedArrayViewType: 340 MOZ_CRASH("unexpected array type"); 341 case Scalar::Float32: 342 masm.loadConstantFloat32(float(GenericNaN()), out.fpu()); 343 break; 344 case Scalar::Float64: 345 masm.loadConstantDouble(GenericNaN(), out.fpu()); 346 break; 347 case Scalar::Int8: 348 case Scalar::Uint8: 349 case Scalar::Int16: 350 case Scalar::Uint16: 351 case Scalar::Int32: 352 case Scalar::Uint32: 353 case Scalar::Uint8Clamped: 354 Register destReg = out.gpr(); 355 masm.mov(ImmWord(0), destReg); 356 break; 357 } 358 masm.jmp(ool.rejoin()); 359 }); 360 addOutOfLineCode(ool, mir); 361 362 masm.wasmBoundsCheck32(Assembler::AboveOrEqual, ToRegister(ptr), 363 ToRegister(boundsCheckLimit), ool->entry()); 364 } 365 366 Operand srcAddr = toMemoryAccessOperand(ins, 0); 367 masm.wasmLoad(mir->access(), srcAddr, out); 368 369 if (ool) { 370 masm.bind(ool->rejoin()); 371 } 372 } 373 374 void CodeGenerator::visitAsmJSStoreHeap(LAsmJSStoreHeap* ins) { 375 const MAsmJSStoreHeap* mir = ins->mir(); 376 377 const LAllocation* ptr = ins->ptr(); 378 const LAllocation* value = ins->value(); 379 const LAllocation* boundsCheckLimit = ins->boundsCheckLimit(); 380 381 Label rejoin; 382 if (mir->needsBoundsCheck()) { 383 masm.wasmBoundsCheck32(Assembler::AboveOrEqual, ToRegister(ptr), 384 ToRegister(boundsCheckLimit), &rejoin); 385 } 386 387 Operand dstAddr = toMemoryAccessOperand(ins, 0); 388 masm.wasmStore(mir->access(), ToAnyRegister(value), dstAddr); 389 390 if (rejoin.used()) { 391 masm.bind(&rejoin); 392 } 393 } 394 395 void CodeGenerator::visitWasmAddOffset(LWasmAddOffset* lir) { 396 MWasmAddOffset* mir = lir->mir(); 397 Register base = ToRegister(lir->base()); 398 Register out = ToRegister(lir->output()); 399 400 if (base != out) { 401 masm.move32(base, out); 402 } 403 masm.add32(Imm32(mir->offset()), out); 404 auto* ool = new (alloc()) LambdaOutOfLineCode([=, this](OutOfLineCode& ool) { 405 masm.wasmTrap(wasm::Trap::OutOfBounds, mir->trapSiteDesc()); 406 }); 407 addOutOfLineCode(ool, mir); 408 masm.j(Assembler::CarrySet, ool->entry()); 409 } 410 411 void CodeGenerator::visitWasmAddOffset64(LWasmAddOffset64* lir) { 412 MWasmAddOffset* mir = lir->mir(); 413 Register64 base = ToRegister64(lir->base()); 414 Register64 out = ToOutRegister64(lir); 415 416 if (base != out) { 417 masm.move64(base, out); 418 } 419 masm.add64(Imm64(mir->offset()), out); 420 auto* ool = new (alloc()) LambdaOutOfLineCode([=, this](OutOfLineCode& ool) { 421 masm.wasmTrap(wasm::Trap::OutOfBounds, mir->trapSiteDesc()); 422 }); 423 addOutOfLineCode(ool, mir); 424 masm.j(Assembler::CarrySet, ool->entry()); 425 } 426 427 void CodeGenerator::visitWasmTruncateToInt32(LWasmTruncateToInt32* lir) { 428 FloatRegister input = ToFloatRegister(lir->input()); 429 Register output = ToRegister(lir->output()); 430 431 MWasmTruncateToInt32* mir = lir->mir(); 432 MIRType inputType = mir->input()->type(); 433 434 MOZ_ASSERT(inputType == MIRType::Double || inputType == MIRType::Float32); 435 436 auto* ool = new (alloc()) OutOfLineWasmTruncateCheck(mir, input, output); 437 addOutOfLineCode(ool, mir); 438 439 Label* oolEntry = ool->entry(); 440 if (mir->isUnsigned()) { 441 if (inputType == MIRType::Double) { 442 masm.wasmTruncateDoubleToUInt32(input, output, mir->isSaturating(), 443 oolEntry); 444 } else if (inputType == MIRType::Float32) { 445 masm.wasmTruncateFloat32ToUInt32(input, output, mir->isSaturating(), 446 oolEntry); 447 } else { 448 MOZ_CRASH("unexpected type"); 449 } 450 if (mir->isSaturating()) { 451 masm.bind(ool->rejoin()); 452 } 453 return; 454 } 455 456 if (inputType == MIRType::Double) { 457 masm.wasmTruncateDoubleToInt32(input, output, mir->isSaturating(), 458 oolEntry); 459 } else if (inputType == MIRType::Float32) { 460 masm.wasmTruncateFloat32ToInt32(input, output, mir->isSaturating(), 461 oolEntry); 462 } else { 463 MOZ_CRASH("unexpected type"); 464 } 465 466 masm.bind(ool->rejoin()); 467 } 468 469 bool CodeGeneratorX86Shared::generateOutOfLineCode() { 470 if (!CodeGeneratorShared::generateOutOfLineCode()) { 471 return false; 472 } 473 474 if (deoptLabel_.used()) { 475 // All non-table-based bailouts will go here. 476 masm.bind(&deoptLabel_); 477 478 // Push the frame size, so the handler can recover the IonScript. 479 masm.push(Imm32(frameSize())); 480 481 TrampolinePtr handler = gen->jitRuntime()->getGenericBailoutHandler(); 482 masm.jump(handler); 483 } 484 485 return !masm.oom(); 486 } 487 488 class BailoutJump { 489 Assembler::Condition cond_; 490 491 public: 492 explicit BailoutJump(Assembler::Condition cond) : cond_(cond) {} 493 #ifdef JS_CODEGEN_X86 494 void operator()(MacroAssembler& masm, uint8_t* code) const { 495 masm.j(cond_, ImmPtr(code), RelocationKind::HARDCODED); 496 } 497 #endif 498 void operator()(MacroAssembler& masm, Label* label) const { 499 masm.j(cond_, label); 500 } 501 }; 502 503 class BailoutLabel { 504 Label* label_; 505 506 public: 507 explicit BailoutLabel(Label* label) : label_(label) {} 508 #ifdef JS_CODEGEN_X86 509 void operator()(MacroAssembler& masm, uint8_t* code) const { 510 masm.retarget(label_, ImmPtr(code), RelocationKind::HARDCODED); 511 } 512 #endif 513 void operator()(MacroAssembler& masm, Label* label) const { 514 masm.retarget(label_, label); 515 } 516 }; 517 518 template <typename T> 519 void CodeGeneratorX86Shared::bailout(const T& binder, LSnapshot* snapshot) { 520 encode(snapshot); 521 522 // All bailout code is associated with the bytecodeSite of the block we are 523 // bailing out from. 524 InlineScriptTree* tree = snapshot->mir()->block()->trackedTree(); 525 auto* ool = new (alloc()) LambdaOutOfLineCode([=, this](OutOfLineCode& ool) { 526 masm.push(Imm32(snapshot->snapshotOffset())); 527 masm.jmp(&deoptLabel_); 528 }); 529 addOutOfLineCode(ool, 530 new (alloc()) BytecodeSite(tree, tree->script()->code())); 531 532 binder(masm, ool->entry()); 533 } 534 535 void CodeGeneratorX86Shared::bailoutIf(Assembler::Condition condition, 536 LSnapshot* snapshot) { 537 bailout(BailoutJump(condition), snapshot); 538 } 539 540 void CodeGeneratorX86Shared::bailoutIf(Assembler::DoubleCondition condition, 541 LSnapshot* snapshot) { 542 MOZ_ASSERT(Assembler::NaNCondFromDoubleCondition(condition) == 543 Assembler::NaN_HandledByCond); 544 bailoutIf(Assembler::ConditionFromDoubleCondition(condition), snapshot); 545 } 546 547 void CodeGeneratorX86Shared::bailoutFrom(Label* label, LSnapshot* snapshot) { 548 MOZ_ASSERT_IF(!masm.oom(), label->used() && !label->bound()); 549 bailout(BailoutLabel(label), snapshot); 550 } 551 552 void CodeGeneratorX86Shared::bailout(LSnapshot* snapshot) { 553 Label label; 554 masm.jump(&label); 555 bailoutFrom(&label, snapshot); 556 } 557 558 void CodeGenerator::visitMinMaxD(LMinMaxD* ins) { 559 FloatRegister first = ToFloatRegister(ins->first()); 560 FloatRegister second = ToFloatRegister(ins->second()); 561 #ifdef DEBUG 562 FloatRegister output = ToFloatRegister(ins->output()); 563 MOZ_ASSERT(first == output); 564 #endif 565 566 bool handleNaN = !ins->mir()->range() || ins->mir()->range()->canBeNaN(); 567 568 if (ins->mir()->isMax()) { 569 masm.maxDouble(second, first, handleNaN); 570 } else { 571 masm.minDouble(second, first, handleNaN); 572 } 573 } 574 575 void CodeGenerator::visitMinMaxF(LMinMaxF* ins) { 576 FloatRegister first = ToFloatRegister(ins->first()); 577 FloatRegister second = ToFloatRegister(ins->second()); 578 #ifdef DEBUG 579 FloatRegister output = ToFloatRegister(ins->output()); 580 MOZ_ASSERT(first == output); 581 #endif 582 583 bool handleNaN = !ins->mir()->range() || ins->mir()->range()->canBeNaN(); 584 585 if (ins->mir()->isMax()) { 586 masm.maxFloat32(second, first, handleNaN); 587 } else { 588 masm.minFloat32(second, first, handleNaN); 589 } 590 } 591 592 void CodeGenerator::visitPowHalfD(LPowHalfD* ins) { 593 FloatRegister input = ToFloatRegister(ins->input()); 594 FloatRegister output = ToFloatRegister(ins->output()); 595 596 ScratchDoubleScope scratch(masm); 597 598 Label done, sqrt; 599 600 if (!ins->mir()->operandIsNeverNegativeInfinity()) { 601 // Branch if not -Infinity. 602 masm.loadConstantDouble(NegativeInfinity<double>(), scratch); 603 604 Assembler::DoubleCondition cond = Assembler::DoubleNotEqualOrUnordered; 605 if (ins->mir()->operandIsNeverNaN()) { 606 cond = Assembler::DoubleNotEqual; 607 } 608 masm.branchDouble(cond, input, scratch, &sqrt); 609 610 // Math.pow(-Infinity, 0.5) == Infinity. 611 masm.zeroDouble(output); 612 masm.subDouble(scratch, output); 613 masm.jump(&done); 614 615 masm.bind(&sqrt); 616 } 617 618 if (!ins->mir()->operandIsNeverNegativeZero()) { 619 // Math.pow(-0, 0.5) == 0 == Math.pow(0, 0.5). 620 // Adding 0 converts any -0 to 0. 621 masm.zeroDouble(scratch); 622 masm.addDouble(input, scratch); 623 masm.vsqrtsd(scratch, output, output); 624 } else { 625 masm.vsqrtsd(input, output, output); 626 } 627 628 masm.bind(&done); 629 } 630 631 void CodeGeneratorX86Shared::emitUndoALUOperationOOL(LInstruction* ins) { 632 Register reg = ToRegister(ins->getDef(0)); 633 634 DebugOnly<LAllocation*> lhs = ins->getOperand(0); 635 LAllocation* rhs = ins->getOperand(1); 636 637 MOZ_ASSERT(reg == ToRegister(lhs)); 638 MOZ_ASSERT_IF(rhs->isGeneralReg(), reg != ToRegister(rhs)); 639 640 // Undo the effect of the ALU operation, which was performed on the output 641 // register and overflowed. Writing to the output register clobbered an 642 // input reg, and the original value of the input needs to be recovered 643 // to satisfy the constraint imposed by any RECOVERED_INPUT operands to 644 // the bailout snapshot. 645 646 if (rhs->isConstant()) { 647 Imm32 constant(ToInt32(rhs)); 648 if (ins->isAddI()) { 649 masm.subl(constant, reg); 650 } else { 651 masm.addl(constant, reg); 652 } 653 } else { 654 if (ins->isAddI()) { 655 masm.subl(ToOperand(rhs), reg); 656 } else { 657 masm.addl(ToOperand(rhs), reg); 658 } 659 } 660 661 bailout(ins->snapshot()); 662 } 663 664 void CodeGenerator::visitAddI(LAddI* ins) { 665 Register lhs = ToRegister(ins->lhs()); 666 const LAllocation* rhs = ins->rhs(); 667 Register out = ToRegister(ins->output()); 668 669 if (rhs->isConstant()) { 670 if (lhs != out) { 671 MOZ_ASSERT(!ins->snapshot()); 672 // Special case to lower the add to LEA instruction. 673 masm.add32(Imm32(ToInt32(rhs)), lhs, out); 674 } else { 675 masm.addl(Imm32(ToInt32(rhs)), lhs); 676 } 677 } else { 678 MOZ_ASSERT(out == lhs); 679 masm.addl(ToOperand(rhs), lhs); 680 } 681 682 if (ins->snapshot()) { 683 if (ins->recoversInput()) { 684 auto* ool = new (alloc()) LambdaOutOfLineCode( 685 [=, this](OutOfLineCode& ool) { emitUndoALUOperationOOL(ins); }); 686 addOutOfLineCode(ool, ins->mir()); 687 masm.j(Assembler::Overflow, ool->entry()); 688 } else { 689 bailoutIf(Assembler::Overflow, ins->snapshot()); 690 } 691 } 692 } 693 694 void CodeGenerator::visitAddI64(LAddI64* lir) { 695 Register64 lhs = ToRegister64(lir->lhs()); 696 LInt64Allocation rhs = lir->rhs(); 697 698 MOZ_ASSERT(ToOutRegister64(lir) == lhs); 699 700 if (IsConstant(rhs)) { 701 masm.add64(Imm64(ToInt64(rhs)), lhs); 702 return; 703 } 704 705 masm.add64(ToOperandOrRegister64(rhs), lhs); 706 } 707 708 void CodeGenerator::visitSubI(LSubI* ins) { 709 Register lhs = ToRegister(ins->lhs()); 710 const LAllocation* rhs = ins->rhs(); 711 712 MOZ_ASSERT(ToRegister(ins->output()) == lhs); 713 714 if (rhs->isConstant()) { 715 masm.subl(Imm32(ToInt32(rhs)), lhs); 716 } else { 717 masm.subl(ToOperand(rhs), lhs); 718 } 719 720 if (ins->snapshot()) { 721 if (ins->recoversInput()) { 722 auto* ool = new (alloc()) LambdaOutOfLineCode( 723 [=, this](OutOfLineCode& ool) { emitUndoALUOperationOOL(ins); }); 724 addOutOfLineCode(ool, ins->mir()); 725 masm.j(Assembler::Overflow, ool->entry()); 726 } else { 727 bailoutIf(Assembler::Overflow, ins->snapshot()); 728 } 729 } 730 } 731 732 void CodeGenerator::visitSubI64(LSubI64* lir) { 733 LInt64Allocation lhs = lir->lhs(); 734 LInt64Allocation rhs = lir->rhs(); 735 736 MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs)); 737 738 if (IsConstant(rhs)) { 739 masm.sub64(Imm64(ToInt64(rhs)), ToRegister64(lhs)); 740 return; 741 } 742 743 masm.sub64(ToOperandOrRegister64(rhs), ToRegister64(lhs)); 744 } 745 746 void CodeGenerator::visitMulI(LMulI* ins) { 747 Register lhs = ToRegister(ins->lhs()); 748 const LAllocation* rhs = ins->rhs(); 749 Register out = ToRegister(ins->output()); 750 751 MMul* mul = ins->mir(); 752 MOZ_ASSERT_IF(mul->mode() == MMul::Integer, 753 !mul->canBeNegativeZero() && !mul->canOverflow()); 754 755 if (rhs->isConstant()) { 756 // Bailout on -0.0 757 int32_t constant = ToInt32(rhs); 758 if (mul->canBeNegativeZero() && constant <= 0) { 759 Assembler::Condition bailoutCond = 760 (constant == 0) ? Assembler::Signed : Assembler::Equal; 761 masm.test32(lhs, lhs); 762 bailoutIf(bailoutCond, ins->snapshot()); 763 } 764 765 if (!mul->canOverflow()) { 766 switch (constant) { 767 case 2: 768 if (lhs == out) { 769 masm.addl(lhs, lhs); 770 } else { 771 masm.leal(Operand(lhs, lhs, TimesOne), out); 772 } 773 return; 774 case 3: 775 masm.leal(Operand(lhs, lhs, TimesTwo), out); 776 return; 777 case 4: 778 if (lhs == out) { 779 masm.shll(Imm32(2), lhs); 780 } else { 781 masm.leal(Operand(lhs, TimesFour, 0), out); 782 } 783 return; 784 case 5: 785 masm.leal(Operand(lhs, lhs, TimesFour), out); 786 return; 787 case 8: 788 if (lhs == out) { 789 masm.shll(Imm32(3), lhs); 790 } else { 791 masm.leal(Operand(lhs, TimesEight, 0), out); 792 } 793 return; 794 case 9: 795 masm.leal(Operand(lhs, lhs, TimesEight), out); 796 return; 797 default: 798 // Use shift if cannot overflow and constant is power of 2 799 int32_t shift = FloorLog2(constant); 800 if (constant > 0 && (1 << shift) == constant) { 801 if (lhs != out) { 802 masm.movl(lhs, out); 803 } 804 masm.shll(Imm32(shift), out); 805 return; 806 } 807 } 808 } 809 810 switch (constant) { 811 case -1: 812 if (lhs != out) { 813 masm.movl(lhs, out); 814 } 815 masm.negl(out); 816 break; 817 case 0: 818 masm.xorl(out, out); 819 return; // escape overflow check; 820 case 1: 821 if (lhs != out) { 822 masm.movl(lhs, out); 823 } 824 return; // escape overflow check; 825 case 2: 826 if (lhs == out) { 827 masm.addl(lhs, lhs); 828 break; 829 } 830 [[fallthrough]]; 831 default: 832 masm.imull(Imm32(constant), lhs, out); 833 } 834 835 // Bailout on overflow 836 if (mul->canOverflow()) { 837 bailoutIf(Assembler::Overflow, ins->snapshot()); 838 } 839 } else { 840 MOZ_ASSERT(out == lhs); 841 842 masm.imull(ToOperand(rhs), lhs); 843 844 // Bailout on overflow 845 if (mul->canOverflow()) { 846 bailoutIf(Assembler::Overflow, ins->snapshot()); 847 } 848 849 if (mul->canBeNegativeZero()) { 850 // Jump to an OOL path if the result is 0. 851 auto* ool = 852 new (alloc()) LambdaOutOfLineCode([=, this](OutOfLineCode& ool) { 853 Register result = ToRegister(ins->output()); 854 Operand lhsCopy = ToOperand(ins->lhsCopy()); 855 Operand rhs = ToOperand(ins->rhs()); 856 MOZ_ASSERT_IF(lhsCopy.kind() == Operand::REG, 857 lhsCopy.reg() != result.code()); 858 859 // Result is -0 if lhs or rhs is negative. 860 masm.movl(lhsCopy, result); 861 masm.orl(rhs, result); 862 bailoutIf(Assembler::Signed, ins->snapshot()); 863 864 masm.mov(ImmWord(0), result); 865 masm.jmp(ool.rejoin()); 866 }); 867 addOutOfLineCode(ool, mul); 868 869 masm.test32(lhs, lhs); 870 masm.j(Assembler::Zero, ool->entry()); 871 masm.bind(ool->rejoin()); 872 } 873 } 874 } 875 876 template <class LIR> 877 static void TrapIfDivideByZero(MacroAssembler& masm, LIR* lir, Register rhs) { 878 auto* mir = lir->mir(); 879 MOZ_ASSERT(mir->trapOnError()); 880 MOZ_ASSERT(mir->canBeDivideByZero()); 881 882 Label nonZero; 883 masm.branchTest32(Assembler::NonZero, rhs, rhs, &nonZero); 884 masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->trapSiteDesc()); 885 masm.bind(&nonZero); 886 } 887 888 OutOfLineCode* CodeGeneratorX86Shared::emitOutOfLineZeroForDivideByZero( 889 Register rhs, Register output) { 890 // Truncated division by zero is zero: (±Infinity|0 == 0) and (NaN|0 == 0). 891 auto* ool = new (alloc()) LambdaOutOfLineCode([=, this](OutOfLineCode& ool) { 892 masm.mov(ImmWord(0), output); 893 masm.jmp(ool.rejoin()); 894 }); 895 masm.branchTest32(Assembler::Zero, rhs, rhs, ool->entry()); 896 897 return ool; 898 } 899 900 void CodeGenerator::visitUDiv(LUDiv* ins) { 901 Register rhs = ToRegister(ins->rhs()); 902 Register output = ToRegister(ins->output()); 903 Register remainder = ToRegister(ins->temp0()); 904 905 MOZ_ASSERT(ToRegister(ins->lhs()) == eax); 906 MOZ_ASSERT(rhs != eax); 907 MOZ_ASSERT(rhs != edx); 908 MOZ_ASSERT(output == eax); 909 MOZ_ASSERT(remainder == edx); 910 911 MDiv* mir = ins->mir(); 912 913 OutOfLineCode* ool = nullptr; 914 915 // Prevent divide by zero. 916 if (mir->canBeDivideByZero()) { 917 if (mir->trapOnError()) { 918 TrapIfDivideByZero(masm, ins, rhs); 919 } else if (mir->isTruncated()) { 920 ool = emitOutOfLineZeroForDivideByZero(rhs, output); 921 } else { 922 MOZ_ASSERT(mir->fallible()); 923 bailoutTest32(Assembler::Zero, rhs, rhs, ins->snapshot()); 924 } 925 } 926 927 // Zero extend the lhs into edx to make (edx:eax), since udiv is 64-bit. 928 masm.mov(ImmWord(0), edx); 929 masm.udiv(rhs); 930 931 // If the remainder is > 0, bailout since this must be a double. 932 if (!mir->canTruncateRemainder()) { 933 bailoutTest32(Assembler::NonZero, remainder, remainder, ins->snapshot()); 934 } 935 936 // Unsigned div can return a value that's not a signed int32. 937 // If our users aren't expecting that, bail. 938 if (!mir->isTruncated()) { 939 bailoutTest32(Assembler::Signed, output, output, ins->snapshot()); 940 } 941 942 if (ool) { 943 addOutOfLineCode(ool, mir); 944 masm.bind(ool->rejoin()); 945 } 946 } 947 948 void CodeGenerator::visitUMod(LUMod* ins) { 949 Register rhs = ToRegister(ins->rhs()); 950 Register output = ToRegister(ins->output()); 951 952 MOZ_ASSERT(ToRegister(ins->lhs()) == eax); 953 MOZ_ASSERT(rhs != eax); 954 MOZ_ASSERT(rhs != edx); 955 MOZ_ASSERT(output == edx); 956 MOZ_ASSERT(ToRegister(ins->temp0()) == eax); 957 958 MMod* mir = ins->mir(); 959 960 OutOfLineCode* ool = nullptr; 961 962 // Prevent divide by zero. 963 if (mir->canBeDivideByZero()) { 964 if (mir->trapOnError()) { 965 TrapIfDivideByZero(masm, ins, rhs); 966 } else if (mir->isTruncated()) { 967 ool = emitOutOfLineZeroForDivideByZero(rhs, output); 968 } else { 969 MOZ_ASSERT(mir->fallible()); 970 bailoutTest32(Assembler::Zero, rhs, rhs, ins->snapshot()); 971 } 972 } 973 974 // Zero extend the lhs into edx to make (edx:eax), since udiv is 64-bit. 975 masm.mov(ImmWord(0), edx); 976 masm.udiv(rhs); 977 978 // Unsigned mod can return a value that's not a signed int32. 979 // If our users aren't expecting that, bail. 980 if (!mir->isTruncated()) { 981 bailoutTest32(Assembler::Signed, output, output, ins->snapshot()); 982 } 983 984 if (ool) { 985 addOutOfLineCode(ool, mir); 986 masm.bind(ool->rejoin()); 987 } 988 } 989 990 template <class LUDivOrUMod> 991 static void UnsignedDivideWithConstant(MacroAssembler& masm, LUDivOrUMod* ins, 992 Register result, Register temp) { 993 Register lhs = ToRegister(ins->numerator()); 994 uint32_t d = ins->denominator(); 995 996 MOZ_ASSERT(lhs != result && lhs != temp); 997 #ifdef JS_CODEGEN_X86 998 MOZ_ASSERT(result == edx && temp == eax); 999 #else 1000 MOZ_ASSERT(result != temp); 1001 #endif 1002 1003 // The denominator isn't a power of 2 (see LDivPowTwoI and LModPowTwoI). 1004 MOZ_ASSERT(!mozilla::IsPowerOfTwo(d)); 1005 1006 auto rmc = ReciprocalMulConstants::computeUnsignedDivisionConstants(d); 1007 1008 // We first compute (M * n) >> 32, where M = rmc.multiplier. 1009 #ifdef JS_CODEGEN_X86 1010 masm.movl(Imm32(rmc.multiplier), eax); 1011 masm.umull(lhs); 1012 #else 1013 // Zero-extend |lhs| in preparation for a 64-bit multiplication. 1014 masm.movl(lhs, result); 1015 1016 // Note that imul sign-extends its 32-bit immediate, but we need an unsigned 1017 // multiplication. 1018 if (int32_t(rmc.multiplier) >= 0) { 1019 masm.imulq(Imm32(rmc.multiplier), result, result); 1020 } else { 1021 masm.movl(Imm32(rmc.multiplier), temp); 1022 masm.imulq(temp, result); 1023 } 1024 if (rmc.multiplier > UINT32_MAX || rmc.shiftAmount == 0) { 1025 masm.shrq(Imm32(32), result); 1026 } 1027 #endif 1028 if (rmc.multiplier > UINT32_MAX) { 1029 // M >= 2^32 and shift == 0 is impossible, as d >= 2 implies that 1030 // ((M * n) >> (32 + shift)) >= n > floor(n/d) whenever n >= d, 1031 // contradicting the proof of correctness in computeDivisionConstants. 1032 MOZ_ASSERT(rmc.shiftAmount > 0); 1033 MOZ_ASSERT(rmc.multiplier < (int64_t(1) << 33)); 1034 1035 // We actually computed result = ((uint32_t(M) * n) >> 32) instead. Since 1036 // (M * n) >> (32 + shift) is the same as (result + n) >> shift, we can 1037 // correct for the overflow. This case is a bit trickier than the signed 1038 // case, though, as the (result + n) addition itself can overflow; however, 1039 // note that 1040 // (result + n) >> shift == (((n - result) >> 1) + result) >> (shift - 1), 1041 // which is overflow-free. See Hacker's Delight, section 10-8 for details. 1042 1043 // Compute (n - result) >> 1 into temp. 1044 masm.movl(lhs, temp); 1045 masm.subl(result, temp); 1046 masm.shrl(Imm32(1), temp); 1047 1048 // Finish the computation. 1049 masm.addl(temp, result); 1050 if (rmc.shiftAmount > 1) { 1051 masm.shrl(Imm32(rmc.shiftAmount - 1), result); 1052 } 1053 } else { 1054 if (rmc.shiftAmount > 0) { 1055 #ifdef JS_CODEGEN_X86 1056 masm.shrl(Imm32(rmc.shiftAmount), result); 1057 #else 1058 masm.shrq(Imm32(32 + rmc.shiftAmount), result); 1059 #endif 1060 } 1061 } 1062 } 1063 1064 void CodeGenerator::visitUDivConstant(LUDivConstant* ins) { 1065 Register lhs = ToRegister(ins->numerator()); 1066 Register output = ToRegister(ins->output()); 1067 Register temp = ToRegister(ins->temp0()); 1068 uint32_t d = ins->denominator(); 1069 1070 MDiv* mir = ins->mir(); 1071 1072 #ifdef JS_CODEGEN_X86 1073 // This emits the division answer into edx. 1074 MOZ_ASSERT(output == edx); 1075 MOZ_ASSERT(temp == eax); 1076 #endif 1077 1078 if (d == 0) { 1079 if (mir->trapOnError()) { 1080 masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->trapSiteDesc()); 1081 } else if (mir->isTruncated()) { 1082 masm.xorl(output, output); 1083 } else { 1084 bailout(ins->snapshot()); 1085 } 1086 return; 1087 } 1088 1089 // Compute the truncated division result in |output|. 1090 UnsignedDivideWithConstant(masm, ins, output, temp); 1091 1092 if (!mir->isTruncated()) { 1093 masm.imull(Imm32(d), output, temp); 1094 bailoutCmp32(Assembler::NotEqual, lhs, temp, ins->snapshot()); 1095 } 1096 } 1097 1098 void CodeGenerator::visitUModConstant(LUModConstant* ins) { 1099 Register lhs = ToRegister(ins->numerator()); 1100 Register output = ToRegister(ins->output()); 1101 Register temp = ToRegister(ins->temp0()); 1102 uint32_t d = ins->denominator(); 1103 1104 MMod* mir = ins->mir(); 1105 1106 #ifdef JS_CODEGEN_X86 1107 // This emits the modulus answer into eax. 1108 MOZ_ASSERT(output == eax); 1109 MOZ_ASSERT(temp == edx); 1110 #endif 1111 1112 if (d == 0) { 1113 if (mir->trapOnError()) { 1114 masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->trapSiteDesc()); 1115 } else if (mir->isTruncated()) { 1116 masm.xorl(output, output); 1117 } else { 1118 bailout(ins->snapshot()); 1119 } 1120 return; 1121 } 1122 1123 // Compute the truncated division result in |temp|. 1124 UnsignedDivideWithConstant(masm, ins, temp, output); 1125 1126 // We now have the truncated division value in |temp|. If we're computing a 1127 // modulus or checking whether the division resulted in an integer, we need 1128 // to multiply the obtained value by d and finish the computation/check. 1129 // 1130 // output = lhs - d * temp 1131 masm.imull(Imm32(d), temp, temp); 1132 masm.movl(lhs, output); 1133 masm.subl(temp, output); 1134 1135 // The final result of the modulus op, just computed above by the 1136 // sub instruction, can be a number in the range [2^31, 2^32). If 1137 // this is the case and the modulus is not truncated, we must bail 1138 // out. 1139 if (!mir->isTruncated()) { 1140 bailoutIf(Assembler::Signed, ins->snapshot()); 1141 } 1142 } 1143 1144 void CodeGenerator::visitDivPowTwoI(LDivPowTwoI* ins) { 1145 Register lhs = ToRegister(ins->numerator()); 1146 DebugOnly<Register> output = ToRegister(ins->output()); 1147 1148 int32_t shift = ins->shift(); 1149 bool negativeDivisor = ins->negativeDivisor(); 1150 MDiv* mir = ins->mir(); 1151 1152 // We use defineReuseInput so these should always be the same, which is 1153 // convenient since all of our instructions here are two-address. 1154 MOZ_ASSERT(lhs == output); 1155 1156 if (!mir->isTruncated() && negativeDivisor) { 1157 // 0 divided by a negative number must return a double. 1158 bailoutTest32(Assembler::Zero, lhs, lhs, ins->snapshot()); 1159 } 1160 1161 if (shift) { 1162 if (!mir->isTruncated()) { 1163 // If the remainder is != 0, bailout since this must be a double. 1164 bailoutTest32(Assembler::NonZero, lhs, Imm32(UINT32_MAX >> (32 - shift)), 1165 ins->snapshot()); 1166 } 1167 1168 if (mir->isUnsigned()) { 1169 masm.shrl(Imm32(shift), lhs); 1170 } else { 1171 // Adjust the value so that shifting produces a correctly 1172 // rounded result when the numerator is negative. See 10-1 1173 // "Signed Division by a Known Power of 2" in Henry 1174 // S. Warren, Jr.'s Hacker's Delight. 1175 if (mir->canBeNegativeDividend() && mir->isTruncated()) { 1176 // Note: There is no need to execute this code, which handles how to 1177 // round the signed integer division towards 0, if we previously bailed 1178 // due to a non-zero remainder. 1179 Register lhsCopy = ToRegister(ins->numeratorCopy()); 1180 MOZ_ASSERT(lhsCopy != lhs); 1181 if (shift > 1) { 1182 // Copy the sign bit of the numerator. (= (2^32 - 1) or 0) 1183 masm.sarl(Imm32(31), lhs); 1184 } 1185 // Divide by 2^(32 - shift) 1186 // i.e. (= (2^32 - 1) / 2^(32 - shift) or 0) 1187 // i.e. (= (2^shift - 1) or 0) 1188 masm.shrl(Imm32(32 - shift), lhs); 1189 // If signed, make any 1 bit below the shifted bits to bubble up, such 1190 // that once shifted the value would be rounded towards 0. 1191 masm.addl(lhsCopy, lhs); 1192 } 1193 masm.sarl(Imm32(shift), lhs); 1194 1195 if (negativeDivisor) { 1196 masm.negl(lhs); 1197 } 1198 } 1199 return; 1200 } 1201 1202 if (negativeDivisor) { 1203 // INT32_MIN / -1 overflows. 1204 masm.negl(lhs); 1205 if (!mir->isTruncated()) { 1206 bailoutIf(Assembler::Overflow, ins->snapshot()); 1207 } else if (mir->trapOnError()) { 1208 Label ok; 1209 masm.j(Assembler::NoOverflow, &ok); 1210 masm.wasmTrap(wasm::Trap::IntegerOverflow, mir->trapSiteDesc()); 1211 masm.bind(&ok); 1212 } 1213 } else if (mir->isUnsigned() && !mir->isTruncated()) { 1214 // Unsigned division by 1 can overflow if output is not truncated. 1215 bailoutTest32(Assembler::Signed, lhs, lhs, ins->snapshot()); 1216 } 1217 } 1218 1219 template <class LDivOrMod> 1220 static void DivideWithConstant(MacroAssembler& masm, LDivOrMod* ins, 1221 Register result, Register temp) { 1222 Register lhs = ToRegister(ins->numerator()); 1223 int32_t d = ins->denominator(); 1224 1225 MOZ_ASSERT(lhs != result && lhs != temp); 1226 #ifdef JS_CODEGEN_X86 1227 MOZ_ASSERT(result == edx && temp == eax); 1228 #else 1229 MOZ_ASSERT(result != temp); 1230 #endif 1231 1232 // The absolute value of the denominator isn't a power of 2 (see LDivPowTwoI 1233 // and LModPowTwoI). 1234 MOZ_ASSERT(!mozilla::IsPowerOfTwo(mozilla::Abs(d))); 1235 1236 auto* mir = ins->mir(); 1237 1238 // We will first divide by Abs(d), and negate the answer if d is negative. 1239 // If desired, this can be avoided by generalizing computeDivisionConstants. 1240 auto rmc = ReciprocalMulConstants::computeSignedDivisionConstants(d); 1241 1242 // We first compute (M * n) >> 32, where M = rmc.multiplier. 1243 #ifdef JS_CODEGEN_X86 1244 masm.movl(Imm32(rmc.multiplier), eax); 1245 masm.imull(lhs); 1246 #else 1247 // Sign-extend |lhs| in preparation for a 64-bit multiplication. 1248 masm.movslq(lhs, result); 1249 masm.imulq(Imm32(rmc.multiplier), result, result); 1250 if (rmc.multiplier > INT32_MAX || rmc.shiftAmount == 0) { 1251 masm.shrq(Imm32(32), result); 1252 } 1253 #endif 1254 if (rmc.multiplier > INT32_MAX) { 1255 MOZ_ASSERT(rmc.multiplier < (int64_t(1) << 32)); 1256 1257 // We actually computed result = ((int32_t(M) * n) >> 32) instead. Since 1258 // (M * n) >> 32 is the same as (result + n), we can correct for the 1259 // overflow. (result + n) can't overflow, as n and |result| have opposite 1260 // signs because int32_t(M) is negative. 1261 masm.addl(lhs, result); 1262 } 1263 // (M * n) >> (32 + shift) is the truncated division answer if n is 1264 // non-negative, as proved in the comments of computeDivisionConstants. We 1265 // must add 1 later if n is negative to get the right answer in all cases. 1266 if (rmc.shiftAmount > 0) { 1267 #ifdef JS_CODEGEN_X86 1268 masm.sarl(Imm32(rmc.shiftAmount), result); 1269 #else 1270 if (rmc.multiplier > INT32_MAX) { 1271 masm.sarl(Imm32(rmc.shiftAmount), result); 1272 } else { 1273 masm.sarq(Imm32(32 + rmc.shiftAmount), result); 1274 } 1275 #endif 1276 } 1277 1278 // We'll subtract -1 instead of adding 1, because (n < 0 ? -1 : 0) can be 1279 // computed with just a sign-extending shift of 31 bits. 1280 if (mir->canBeNegativeDividend()) { 1281 masm.movl(lhs, temp); 1282 masm.sarl(Imm32(31), temp); 1283 masm.subl(temp, result); 1284 } 1285 1286 // After this, |result| contains the correct truncated division result. 1287 if (d < 0) { 1288 masm.negl(result); 1289 } 1290 } 1291 1292 void CodeGenerator::visitDivConstantI(LDivConstantI* ins) { 1293 Register lhs = ToRegister(ins->numerator()); 1294 Register output = ToRegister(ins->output()); 1295 Register temp = ToRegister(ins->temp0()); 1296 int32_t d = ins->denominator(); 1297 1298 MDiv* mir = ins->mir(); 1299 1300 #ifdef JS_CODEGEN_X86 1301 // This emits the division answer into edx. 1302 MOZ_ASSERT(output == edx); 1303 MOZ_ASSERT(temp == eax); 1304 #endif 1305 1306 if (d == 0) { 1307 if (mir->trapOnError()) { 1308 masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->trapSiteDesc()); 1309 } else if (mir->isTruncated()) { 1310 masm.xorl(output, output); 1311 } else { 1312 bailout(ins->snapshot()); 1313 } 1314 return; 1315 } 1316 1317 // Compute the truncated division result in |output|. 1318 DivideWithConstant(masm, ins, output, temp); 1319 1320 if (!mir->isTruncated()) { 1321 // This is a division op. Multiply the obtained value by d to check if 1322 // the correct answer is an integer. This cannot overflow, since |d| > 1. 1323 masm.imull(Imm32(d), output, temp); 1324 bailoutCmp32(Assembler::NotEqual, lhs, temp, ins->snapshot()); 1325 1326 // If lhs is zero and the divisor is negative, the answer should have 1327 // been -0. 1328 if (d < 0) { 1329 bailoutTest32(Assembler::Zero, lhs, lhs, ins->snapshot()); 1330 } 1331 } 1332 } 1333 1334 void CodeGenerator::visitModConstantI(LModConstantI* ins) { 1335 Register lhs = ToRegister(ins->numerator()); 1336 Register output = ToRegister(ins->output()); 1337 Register temp = ToRegister(ins->temp0()); 1338 int32_t d = ins->denominator(); 1339 1340 MMod* mir = ins->mir(); 1341 1342 #ifdef JS_CODEGEN_X86 1343 // This emits the modulus answer into eax. 1344 MOZ_ASSERT(output == eax); 1345 MOZ_ASSERT(temp == edx); 1346 #endif 1347 1348 if (d == 0) { 1349 if (mir->trapOnError()) { 1350 masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->trapSiteDesc()); 1351 } else if (mir->isTruncated()) { 1352 masm.xorl(output, output); 1353 } else { 1354 bailout(ins->snapshot()); 1355 } 1356 return; 1357 } 1358 1359 // Compute the truncated division result in |temp|. 1360 DivideWithConstant(masm, ins, temp, output); 1361 1362 // Compute the remainder in |output|: output = lhs - d * temp 1363 masm.imull(Imm32(-d), temp, output); 1364 masm.addl(lhs, output); 1365 1366 if (!mir->isTruncated() && mir->canBeNegativeDividend()) { 1367 // This is a mod op. If the computed value is zero and lhs 1368 // is negative, the answer should have been -0. 1369 Label done; 1370 masm.branch32(Assembler::GreaterThanOrEqual, lhs, Imm32(0), &done); 1371 bailoutTest32(Assembler::Zero, output, output, ins->snapshot()); 1372 masm.bind(&done); 1373 } 1374 } 1375 1376 void CodeGenerator::visitDivI(LDivI* ins) { 1377 Register remainder = ToRegister(ins->temp0()); 1378 Register lhs = ToRegister(ins->lhs()); 1379 Register rhs = ToRegister(ins->rhs()); 1380 Register output = ToRegister(ins->output()); 1381 1382 MOZ_ASSERT(lhs == eax); 1383 MOZ_ASSERT(rhs != eax); 1384 MOZ_ASSERT(rhs != edx); 1385 MOZ_ASSERT(remainder == edx); 1386 MOZ_ASSERT(output == eax); 1387 1388 MDiv* mir = ins->mir(); 1389 1390 Label done; 1391 OutOfLineCode* ool = nullptr; 1392 1393 // Handle divide by zero. 1394 if (mir->canBeDivideByZero()) { 1395 if (mir->trapOnError()) { 1396 TrapIfDivideByZero(masm, ins, rhs); 1397 } else if (mir->canTruncateInfinities()) { 1398 ool = emitOutOfLineZeroForDivideByZero(rhs, output); 1399 } else { 1400 MOZ_ASSERT(mir->fallible()); 1401 bailoutTest32(Assembler::Zero, rhs, rhs, ins->snapshot()); 1402 } 1403 } 1404 1405 // Handle an integer overflow exception from -2147483648 / -1. 1406 if (mir->canBeNegativeOverflow()) { 1407 Label notOverflow; 1408 masm.branch32(Assembler::NotEqual, lhs, Imm32(INT32_MIN), ¬Overflow); 1409 if (mir->trapOnError()) { 1410 masm.branch32(Assembler::NotEqual, rhs, Imm32(-1), ¬Overflow); 1411 masm.wasmTrap(wasm::Trap::IntegerOverflow, mir->trapSiteDesc()); 1412 } else if (mir->canTruncateOverflow()) { 1413 // (-INT32_MIN)|0 == INT32_MIN and INT32_MIN is already in the 1414 // output register (lhs == eax). 1415 masm.branch32(Assembler::Equal, rhs, Imm32(-1), &done); 1416 } else { 1417 MOZ_ASSERT(mir->fallible()); 1418 bailoutCmp32(Assembler::Equal, rhs, Imm32(-1), ins->snapshot()); 1419 } 1420 masm.bind(¬Overflow); 1421 } 1422 1423 // Handle negative 0. 1424 if (!mir->canTruncateNegativeZero() && mir->canBeNegativeZero()) { 1425 Label nonzero; 1426 masm.branchTest32(Assembler::NonZero, lhs, lhs, &nonzero); 1427 bailoutCmp32(Assembler::LessThan, rhs, Imm32(0), ins->snapshot()); 1428 masm.bind(&nonzero); 1429 } 1430 1431 // Sign extend the lhs into edx to make (edx:eax), since idiv is 64-bit. 1432 masm.cdq(); 1433 masm.idiv(rhs); 1434 1435 if (!mir->canTruncateRemainder()) { 1436 // If the remainder is > 0, bailout since this must be a double. 1437 bailoutTest32(Assembler::NonZero, remainder, remainder, ins->snapshot()); 1438 } 1439 1440 masm.bind(&done); 1441 1442 if (ool) { 1443 addOutOfLineCode(ool, mir); 1444 masm.bind(ool->rejoin()); 1445 } 1446 } 1447 1448 void CodeGenerator::visitModPowTwoI(LModPowTwoI* ins) { 1449 Register lhs = ToRegister(ins->input()); 1450 int32_t shift = ins->shift(); 1451 bool canBeNegative = 1452 !ins->mir()->isUnsigned() && ins->mir()->canBeNegativeDividend(); 1453 1454 if (shift == 0) { 1455 if (canBeNegative && !ins->mir()->isTruncated()) { 1456 bailoutTest32(Assembler::Signed, lhs, lhs, ins->snapshot()); 1457 } 1458 masm.xorl(lhs, lhs); 1459 return; 1460 } 1461 1462 auto clearHighBits = [&]() { 1463 switch (shift) { 1464 case 16: 1465 masm.movzwl(lhs, lhs); 1466 break; 1467 case 8: 1468 if (AllocatableGeneralRegisterSet(Registers::SingleByteRegs).has(lhs)) { 1469 masm.movzbl(lhs, lhs); 1470 break; 1471 } 1472 [[fallthrough]]; 1473 default: 1474 masm.andl(Imm32((uint32_t(1) << shift) - 1), lhs); 1475 break; 1476 } 1477 }; 1478 1479 Label negative; 1480 1481 if (canBeNegative) { 1482 // Switch based on sign of the lhs. 1483 // Positive numbers are just a bitmask 1484 masm.branchTest32(Assembler::Signed, lhs, lhs, &negative); 1485 } 1486 1487 clearHighBits(); 1488 1489 if (canBeNegative) { 1490 Label done; 1491 masm.jump(&done); 1492 1493 // Negative numbers need a negate, bitmask, negate 1494 masm.bind(&negative); 1495 1496 // Unlike in the visitModI case, we are not computing the mod by means of a 1497 // division. Therefore, the divisor = -1 case isn't problematic (the andl 1498 // always returns 0, which is what we expect). 1499 // 1500 // The negl instruction overflows if lhs == INT32_MIN, but this is also not 1501 // a problem: shift is at most 31, and so the andl also always returns 0. 1502 masm.negl(lhs); 1503 clearHighBits(); 1504 masm.negl(lhs); 1505 1506 // Since a%b has the same sign as b, and a is negative in this branch, 1507 // an answer of 0 means the correct result is actually -0. Bail out. 1508 if (!ins->mir()->isTruncated()) { 1509 bailoutIf(Assembler::Zero, ins->snapshot()); 1510 } 1511 masm.bind(&done); 1512 } 1513 } 1514 1515 class ModOverflowCheck : public OutOfLineCodeBase<CodeGeneratorX86Shared> { 1516 Label done_; 1517 LModI* ins_; 1518 Register rhs_; 1519 1520 public: 1521 explicit ModOverflowCheck(LModI* ins, Register rhs) : ins_(ins), rhs_(rhs) {} 1522 1523 virtual void accept(CodeGeneratorX86Shared* codegen) override { 1524 codegen->visitModOverflowCheck(this); 1525 } 1526 Label* done() { return &done_; } 1527 LModI* ins() const { return ins_; } 1528 Register rhs() const { return rhs_; } 1529 }; 1530 1531 void CodeGeneratorX86Shared::visitModOverflowCheck(ModOverflowCheck* ool) { 1532 masm.cmp32(ool->rhs(), Imm32(-1)); 1533 if (ool->ins()->mir()->isTruncated()) { 1534 masm.j(Assembler::NotEqual, ool->rejoin()); 1535 masm.mov(ImmWord(0), edx); 1536 masm.jmp(ool->done()); 1537 } else { 1538 bailoutIf(Assembler::Equal, ool->ins()->snapshot()); 1539 masm.jmp(ool->rejoin()); 1540 } 1541 } 1542 1543 void CodeGenerator::visitModI(LModI* ins) { 1544 Register remainder = ToRegister(ins->output()); 1545 Register lhs = ToRegister(ins->lhs()); 1546 Register rhs = ToRegister(ins->rhs()); 1547 1548 // Required to use idiv. 1549 MOZ_ASSERT(lhs == eax); 1550 MOZ_ASSERT(rhs != eax); 1551 MOZ_ASSERT(rhs != edx); 1552 MOZ_ASSERT(remainder == edx); 1553 MOZ_ASSERT(ToRegister(ins->temp0()) == eax); 1554 1555 MMod* mir = ins->mir(); 1556 1557 Label done; 1558 OutOfLineCode* ool = nullptr; 1559 ModOverflowCheck* overflow = nullptr; 1560 1561 // Prevent divide by zero. 1562 if (mir->canBeDivideByZero()) { 1563 if (mir->trapOnError()) { 1564 TrapIfDivideByZero(masm, ins, rhs); 1565 } else if (mir->isTruncated()) { 1566 ool = emitOutOfLineZeroForDivideByZero(rhs, remainder); 1567 } else { 1568 MOZ_ASSERT(mir->fallible()); 1569 bailoutTest32(Assembler::Zero, rhs, rhs, ins->snapshot()); 1570 } 1571 } 1572 1573 Label negative; 1574 1575 // Switch based on sign of the lhs. 1576 if (mir->canBeNegativeDividend()) { 1577 masm.branchTest32(Assembler::Signed, lhs, lhs, &negative); 1578 } 1579 1580 // If lhs >= 0 then remainder = lhs % rhs. The remainder must be positive. 1581 { 1582 // Check if rhs is a power-of-two. 1583 if (mir->canBePowerOfTwoDivisor()) { 1584 MOZ_ASSERT(rhs != remainder); 1585 1586 // Rhs y is a power-of-two if (y & (y-1)) == 0. Note that if 1587 // y is any negative number other than INT32_MIN, both y and 1588 // y-1 will have the sign bit set so these are never optimized 1589 // as powers-of-two. If y is INT32_MIN, y-1 will be INT32_MAX 1590 // and because lhs >= 0 at this point, lhs & INT32_MAX returns 1591 // the correct value. 1592 Label notPowerOfTwo; 1593 masm.mov(rhs, remainder); 1594 masm.subl(Imm32(1), remainder); 1595 masm.branchTest32(Assembler::NonZero, remainder, rhs, ¬PowerOfTwo); 1596 { 1597 masm.andl(lhs, remainder); 1598 masm.jmp(&done); 1599 } 1600 masm.bind(¬PowerOfTwo); 1601 } 1602 1603 // Since lhs >= 0, the sign-extension will be 0 1604 masm.mov(ImmWord(0), edx); 1605 masm.idiv(rhs); 1606 } 1607 1608 // Otherwise, we have to beware of two special cases: 1609 if (mir->canBeNegativeDividend()) { 1610 masm.jump(&done); 1611 1612 masm.bind(&negative); 1613 1614 // Prevent an integer overflow exception from -2147483648 % -1 1615 overflow = new (alloc()) ModOverflowCheck(ins, rhs); 1616 masm.branch32(Assembler::Equal, lhs, Imm32(INT32_MIN), overflow->entry()); 1617 masm.bind(overflow->rejoin()); 1618 1619 masm.cdq(); 1620 masm.idiv(rhs); 1621 1622 if (!mir->isTruncated()) { 1623 // A remainder of 0 means that the rval must be -0, which is a double. 1624 bailoutTest32(Assembler::Zero, remainder, remainder, ins->snapshot()); 1625 } 1626 } 1627 1628 masm.bind(&done); 1629 1630 if (overflow) { 1631 addOutOfLineCode(overflow, mir); 1632 masm.bind(overflow->done()); 1633 } 1634 1635 if (ool) { 1636 addOutOfLineCode(ool, mir); 1637 masm.bind(ool->rejoin()); 1638 } 1639 } 1640 1641 void CodeGenerator::visitBitNotI(LBitNotI* ins) { 1642 Register input = ToRegister(ins->input()); 1643 MOZ_ASSERT(input == ToRegister(ins->output())); 1644 1645 masm.notl(input); 1646 } 1647 1648 void CodeGenerator::visitBitOpI(LBitOpI* ins) { 1649 Register lhs = ToRegister(ins->lhs()); 1650 const LAllocation* rhs = ins->rhs(); 1651 1652 MOZ_ASSERT(lhs == ToRegister(ins->output())); 1653 1654 switch (ins->bitop()) { 1655 case JSOp::BitOr: 1656 if (rhs->isConstant()) { 1657 masm.orl(Imm32(ToInt32(rhs)), lhs); 1658 } else { 1659 masm.orl(ToOperand(rhs), lhs); 1660 } 1661 break; 1662 case JSOp::BitXor: 1663 if (rhs->isConstant()) { 1664 masm.xorl(Imm32(ToInt32(rhs)), lhs); 1665 } else { 1666 masm.xorl(ToOperand(rhs), lhs); 1667 } 1668 break; 1669 case JSOp::BitAnd: 1670 if (rhs->isConstant()) { 1671 masm.andl(Imm32(ToInt32(rhs)), lhs); 1672 } else { 1673 masm.andl(ToOperand(rhs), lhs); 1674 } 1675 break; 1676 default: 1677 MOZ_CRASH("unexpected binary opcode"); 1678 } 1679 } 1680 1681 void CodeGenerator::visitBitOpI64(LBitOpI64* lir) { 1682 Register64 lhs = ToRegister64(lir->lhs()); 1683 LInt64Allocation rhs = lir->rhs(); 1684 1685 MOZ_ASSERT(ToOutRegister64(lir) == lhs); 1686 1687 switch (lir->bitop()) { 1688 case JSOp::BitOr: 1689 if (IsConstant(rhs)) { 1690 masm.or64(Imm64(ToInt64(rhs)), lhs); 1691 } else { 1692 masm.or64(ToOperandOrRegister64(rhs), lhs); 1693 } 1694 break; 1695 case JSOp::BitXor: 1696 if (IsConstant(rhs)) { 1697 masm.xor64(Imm64(ToInt64(rhs)), lhs); 1698 } else { 1699 masm.xor64(ToOperandOrRegister64(rhs), lhs); 1700 } 1701 break; 1702 case JSOp::BitAnd: 1703 if (IsConstant(rhs)) { 1704 masm.and64(Imm64(ToInt64(rhs)), lhs); 1705 } else { 1706 masm.and64(ToOperandOrRegister64(rhs), lhs); 1707 } 1708 break; 1709 default: 1710 MOZ_CRASH("unexpected binary opcode"); 1711 } 1712 } 1713 1714 void CodeGenerator::visitShiftI(LShiftI* ins) { 1715 Register lhs = ToRegister(ins->lhs()); 1716 const LAllocation* rhs = ins->rhs(); 1717 Register out = ToRegister(ins->output()); 1718 1719 if (rhs->isConstant()) { 1720 MOZ_ASSERT(out == lhs); 1721 1722 int32_t shift = ToInt32(rhs) & 0x1F; 1723 switch (ins->bitop()) { 1724 case JSOp::Lsh: 1725 if (shift) { 1726 masm.lshift32(Imm32(shift), lhs); 1727 } 1728 break; 1729 case JSOp::Rsh: 1730 if (shift) { 1731 masm.rshift32Arithmetic(Imm32(shift), lhs); 1732 } 1733 break; 1734 case JSOp::Ursh: 1735 if (shift) { 1736 masm.rshift32(Imm32(shift), lhs); 1737 } else if (ins->mir()->toUrsh()->fallible()) { 1738 // x >>> 0 can overflow. 1739 masm.test32(lhs, lhs); 1740 bailoutIf(Assembler::Signed, ins->snapshot()); 1741 } 1742 break; 1743 default: 1744 MOZ_CRASH("Unexpected shift op"); 1745 } 1746 } else { 1747 Register shift = ToRegister(rhs); 1748 MOZ_ASSERT_IF(out != lhs, Assembler::HasBMI2()); 1749 1750 switch (ins->bitop()) { 1751 case JSOp::Lsh: 1752 if (out != lhs) { 1753 masm.shlxl(lhs, shift, out); 1754 } else { 1755 masm.lshift32(shift, lhs); 1756 } 1757 break; 1758 case JSOp::Rsh: 1759 if (out != lhs) { 1760 masm.sarxl(lhs, shift, out); 1761 } else { 1762 masm.rshift32Arithmetic(shift, lhs); 1763 } 1764 break; 1765 case JSOp::Ursh: 1766 if (out != lhs) { 1767 masm.shrxl(lhs, shift, out); 1768 } else { 1769 masm.rshift32(shift, lhs); 1770 } 1771 if (ins->mir()->toUrsh()->fallible()) { 1772 // x >>> 0 can overflow. 1773 masm.test32(out, out); 1774 bailoutIf(Assembler::Signed, ins->snapshot()); 1775 } 1776 break; 1777 default: 1778 MOZ_CRASH("Unexpected shift op"); 1779 } 1780 } 1781 } 1782 1783 void CodeGenerator::visitUrshD(LUrshD* ins) { 1784 Register lhs = ToRegister(ins->lhs()); 1785 const LAllocation* rhs = ins->rhs(); 1786 FloatRegister out = ToFloatRegister(ins->output()); 1787 Register temp = ToRegister(ins->temp0()); 1788 1789 if (rhs->isConstant()) { 1790 MOZ_ASSERT(temp == lhs); 1791 1792 int32_t shift = ToInt32(rhs) & 0x1F; 1793 if (shift) { 1794 masm.shrl(Imm32(shift), lhs); 1795 } 1796 } else { 1797 MOZ_ASSERT_IF(temp != lhs, Assembler::HasBMI2()); 1798 1799 Register shift = ToRegister(rhs); 1800 if (temp != lhs) { 1801 masm.shrxl(lhs, shift, temp); 1802 } else { 1803 masm.rshift32(shift, lhs); 1804 } 1805 } 1806 1807 masm.convertUInt32ToDouble(temp, out); 1808 } 1809 1810 Operand CodeGeneratorX86Shared::ToOperand(const LAllocation& a) { 1811 if (a.isGeneralReg()) { 1812 return Operand(a.toGeneralReg()->reg()); 1813 } 1814 if (a.isFloatReg()) { 1815 return Operand(a.toFloatReg()->reg()); 1816 } 1817 return Operand(ToAddress(a)); 1818 } 1819 1820 Operand CodeGeneratorX86Shared::ToOperand(const LAllocation* a) { 1821 return ToOperand(*a); 1822 } 1823 1824 MoveOperand CodeGeneratorX86Shared::toMoveOperand(LAllocation a) const { 1825 if (a.isGeneralReg()) { 1826 return MoveOperand(ToRegister(a)); 1827 } 1828 if (a.isFloatReg()) { 1829 return MoveOperand(ToFloatRegister(a)); 1830 } 1831 MoveOperand::Kind kind = a.isStackArea() ? MoveOperand::Kind::EffectiveAddress 1832 : MoveOperand::Kind::Memory; 1833 return MoveOperand(ToAddress(a), kind); 1834 } 1835 1836 class OutOfLineTableSwitch : public OutOfLineCodeBase<CodeGeneratorX86Shared> { 1837 MTableSwitch* mir_; 1838 CodeLabel jumpLabel_; 1839 1840 void accept(CodeGeneratorX86Shared* codegen) override { 1841 codegen->visitOutOfLineTableSwitch(this); 1842 } 1843 1844 public: 1845 explicit OutOfLineTableSwitch(MTableSwitch* mir) : mir_(mir) {} 1846 1847 MTableSwitch* mir() const { return mir_; } 1848 1849 CodeLabel* jumpLabel() { return &jumpLabel_; } 1850 }; 1851 1852 void CodeGeneratorX86Shared::visitOutOfLineTableSwitch( 1853 OutOfLineTableSwitch* ool) { 1854 MTableSwitch* mir = ool->mir(); 1855 1856 masm.haltingAlign(sizeof(void*)); 1857 masm.bind(ool->jumpLabel()); 1858 masm.addCodeLabel(*ool->jumpLabel()); 1859 1860 for (size_t i = 0; i < mir->numCases(); i++) { 1861 LBlock* caseblock = skipTrivialBlocks(mir->getCase(i))->lir(); 1862 Label* caseheader = caseblock->label(); 1863 uint32_t caseoffset = caseheader->offset(); 1864 1865 // The entries of the jump table need to be absolute addresses and thus 1866 // must be patched after codegen is finished. 1867 CodeLabel cl; 1868 masm.writeCodePointer(&cl); 1869 cl.target()->bind(caseoffset); 1870 masm.addCodeLabel(cl); 1871 } 1872 } 1873 1874 void CodeGeneratorX86Shared::emitTableSwitchDispatch(MTableSwitch* mir, 1875 Register index, 1876 Register base) { 1877 Label* defaultcase = skipTrivialBlocks(mir->getDefault())->lir()->label(); 1878 1879 // Lower value with low value 1880 if (mir->low() != 0) { 1881 masm.subl(Imm32(mir->low()), index); 1882 } 1883 1884 // Jump to default case if input is out of range 1885 int32_t cases = mir->numCases(); 1886 masm.cmp32(index, Imm32(cases)); 1887 masm.j(AssemblerX86Shared::AboveOrEqual, defaultcase); 1888 1889 // To fill in the CodeLabels for the case entries, we need to first 1890 // generate the case entries (we don't yet know their offsets in the 1891 // instruction stream). 1892 OutOfLineTableSwitch* ool = new (alloc()) OutOfLineTableSwitch(mir); 1893 addOutOfLineCode(ool, mir); 1894 1895 // Compute the position where a pointer to the right case stands. 1896 masm.mov(ool->jumpLabel(), base); 1897 BaseIndex pointer(base, index, ScalePointer); 1898 1899 // Jump to the right case 1900 masm.branchToComputedAddress(pointer); 1901 } 1902 1903 void CodeGenerator::visitMathD(LMathD* math) { 1904 FloatRegister lhs = ToFloatRegister(math->lhs()); 1905 Operand rhs = ToOperand(math->rhs()); 1906 FloatRegister output = ToFloatRegister(math->output()); 1907 1908 switch (math->jsop()) { 1909 case JSOp::Add: 1910 masm.vaddsd(rhs, lhs, output); 1911 break; 1912 case JSOp::Sub: 1913 masm.vsubsd(rhs, lhs, output); 1914 break; 1915 case JSOp::Mul: 1916 masm.vmulsd(rhs, lhs, output); 1917 break; 1918 case JSOp::Div: 1919 masm.vdivsd(rhs, lhs, output); 1920 break; 1921 default: 1922 MOZ_CRASH("unexpected opcode"); 1923 } 1924 } 1925 1926 void CodeGenerator::visitMathF(LMathF* math) { 1927 FloatRegister lhs = ToFloatRegister(math->lhs()); 1928 Operand rhs = ToOperand(math->rhs()); 1929 FloatRegister output = ToFloatRegister(math->output()); 1930 1931 switch (math->jsop()) { 1932 case JSOp::Add: 1933 masm.vaddss(rhs, lhs, output); 1934 break; 1935 case JSOp::Sub: 1936 masm.vsubss(rhs, lhs, output); 1937 break; 1938 case JSOp::Mul: 1939 masm.vmulss(rhs, lhs, output); 1940 break; 1941 case JSOp::Div: 1942 masm.vdivss(rhs, lhs, output); 1943 break; 1944 default: 1945 MOZ_CRASH("unexpected opcode"); 1946 } 1947 } 1948 1949 void CodeGenerator::visitEffectiveAddress3(LEffectiveAddress3* ins) { 1950 const MEffectiveAddress3* mir = ins->mir(); 1951 Register base = ToRegister(ins->base()); 1952 Register index = ToRegister(ins->index()); 1953 Register output = ToRegister(ins->output()); 1954 // Regarding performance, we rely on the fact that, if `mir->displacement()` 1955 // is zero, `masm` will generate a 2-addend `leal`, and not a 3-addend one 1956 // with a zero constant, since that is slower on some processors. 1957 // See comments in EffectiveAddressAnalysis.cpp. 1958 masm.leal(Operand(base, index, mir->scale(), mir->displacement()), output); 1959 } 1960 1961 void CodeGenerator::visitEffectiveAddress2(LEffectiveAddress2* ins) { 1962 const MEffectiveAddress2* mir = ins->mir(); 1963 Register index = ToRegister(ins->index()); 1964 Register output = ToRegister(ins->output()); 1965 masm.leal(Operand(index, mir->scale(), mir->displacement()), output); 1966 } 1967 1968 void CodeGeneratorX86Shared::generateInvalidateEpilogue() { 1969 // Ensure that there is enough space in the buffer for the OsiPoint 1970 // patching to occur. Otherwise, we could overwrite the invalidation 1971 // epilogue. 1972 for (size_t i = 0; i < sizeof(void*); i += Assembler::NopSize()) { 1973 masm.nop(); 1974 } 1975 1976 masm.bind(&invalidate_); 1977 1978 // Push the Ion script onto the stack (when we determine what that pointer 1979 // is). 1980 invalidateEpilogueData_ = masm.pushWithPatch(ImmWord(uintptr_t(-1))); 1981 1982 // Jump to the invalidator which will replace the current frame. 1983 TrampolinePtr thunk = gen->jitRuntime()->getInvalidationThunk(); 1984 masm.jump(thunk); 1985 } 1986 1987 void CodeGenerator::visitNegI(LNegI* ins) { 1988 Register input = ToRegister(ins->input()); 1989 MOZ_ASSERT(input == ToRegister(ins->output())); 1990 1991 masm.neg32(input); 1992 } 1993 1994 void CodeGenerator::visitNegI64(LNegI64* ins) { 1995 Register64 input = ToRegister64(ins->input()); 1996 MOZ_ASSERT(input == ToOutRegister64(ins)); 1997 masm.neg64(input); 1998 } 1999 2000 void CodeGenerator::visitNegD(LNegD* ins) { 2001 FloatRegister input = ToFloatRegister(ins->input()); 2002 FloatRegister output = ToFloatRegister(ins->output()); 2003 2004 // XOR the float in a float register with -0.0. 2005 masm.vxorpdSimd128(SimdConstant::SplatX2(-0.0), input, output); 2006 } 2007 2008 void CodeGenerator::visitNegF(LNegF* ins) { 2009 FloatRegister input = ToFloatRegister(ins->input()); 2010 FloatRegister output = ToFloatRegister(ins->output()); 2011 2012 // XOR the float in a float register with -0.0. 2013 masm.vxorpsSimd128(SimdConstant::SplatX4(-0.0f), input, output); 2014 } 2015 2016 void CodeGenerator::visitCompareExchangeTypedArrayElement( 2017 LCompareExchangeTypedArrayElement* lir) { 2018 Register elements = ToRegister(lir->elements()); 2019 AnyRegister output = ToAnyRegister(lir->output()); 2020 Register temp = ToTempRegisterOrInvalid(lir->temp0()); 2021 2022 Register oldval = ToRegister(lir->oldval()); 2023 Register newval = ToRegister(lir->newval()); 2024 2025 Scalar::Type arrayType = lir->mir()->arrayType(); 2026 2027 auto dest = ToAddressOrBaseIndex(elements, lir->index(), arrayType); 2028 2029 dest.match([&](const auto& dest) { 2030 masm.compareExchangeJS(arrayType, Synchronization::Full(), dest, oldval, 2031 newval, temp, output); 2032 }); 2033 } 2034 2035 void CodeGenerator::visitAtomicExchangeTypedArrayElement( 2036 LAtomicExchangeTypedArrayElement* lir) { 2037 Register elements = ToRegister(lir->elements()); 2038 AnyRegister output = ToAnyRegister(lir->output()); 2039 Register temp = ToTempRegisterOrInvalid(lir->temp0()); 2040 2041 Register value = ToRegister(lir->value()); 2042 2043 Scalar::Type arrayType = lir->mir()->arrayType(); 2044 2045 auto dest = ToAddressOrBaseIndex(elements, lir->index(), arrayType); 2046 2047 dest.match([&](const auto& dest) { 2048 masm.atomicExchangeJS(arrayType, Synchronization::Full(), dest, value, temp, 2049 output); 2050 }); 2051 } 2052 2053 void CodeGenerator::visitAtomicTypedArrayElementBinop( 2054 LAtomicTypedArrayElementBinop* lir) { 2055 MOZ_ASSERT(!lir->mir()->isForEffect()); 2056 2057 AnyRegister output = ToAnyRegister(lir->output()); 2058 Register elements = ToRegister(lir->elements()); 2059 Register temp1 = ToTempRegisterOrInvalid(lir->temp0()); 2060 Register temp2 = ToTempRegisterOrInvalid(lir->temp1()); 2061 const LAllocation* value = lir->value(); 2062 2063 Scalar::Type arrayType = lir->mir()->arrayType(); 2064 AtomicOp atomicOp = lir->mir()->operation(); 2065 2066 auto mem = ToAddressOrBaseIndex(elements, lir->index(), arrayType); 2067 2068 mem.match([&](const auto& mem) { 2069 if (value->isConstant()) { 2070 masm.atomicFetchOpJS(arrayType, Synchronization::Full(), atomicOp, 2071 Imm32(ToInt32(value)), mem, temp1, temp2, output); 2072 } else { 2073 masm.atomicFetchOpJS(arrayType, Synchronization::Full(), atomicOp, 2074 ToRegister(value), mem, temp1, temp2, output); 2075 } 2076 }); 2077 } 2078 2079 void CodeGenerator::visitAtomicTypedArrayElementBinopForEffect( 2080 LAtomicTypedArrayElementBinopForEffect* lir) { 2081 MOZ_ASSERT(lir->mir()->isForEffect()); 2082 2083 Register elements = ToRegister(lir->elements()); 2084 const LAllocation* value = lir->value(); 2085 Scalar::Type arrayType = lir->mir()->arrayType(); 2086 AtomicOp atomicOp = lir->mir()->operation(); 2087 2088 auto mem = ToAddressOrBaseIndex(elements, lir->index(), arrayType); 2089 2090 mem.match([&](const auto& mem) { 2091 if (value->isConstant()) { 2092 masm.atomicEffectOpJS(arrayType, Synchronization::Full(), atomicOp, 2093 Imm32(ToInt32(value)), mem, InvalidReg); 2094 } else { 2095 masm.atomicEffectOpJS(arrayType, Synchronization::Full(), atomicOp, 2096 ToRegister(value), mem, InvalidReg); 2097 } 2098 }); 2099 } 2100 2101 void CodeGeneratorX86Shared::visitOutOfLineWasmTruncateCheck( 2102 OutOfLineWasmTruncateCheck* ool) { 2103 FloatRegister input = ool->input(); 2104 Register output = ool->output(); 2105 Register64 output64 = ool->output64(); 2106 MIRType fromType = ool->fromType(); 2107 MIRType toType = ool->toType(); 2108 Label* oolRejoin = ool->rejoin(); 2109 TruncFlags flags = ool->flags(); 2110 const wasm::TrapSiteDesc& trapSiteDesc = ool->trapSiteDesc(); 2111 2112 if (fromType == MIRType::Float32) { 2113 if (toType == MIRType::Int32) { 2114 masm.oolWasmTruncateCheckF32ToI32(input, output, flags, trapSiteDesc, 2115 oolRejoin); 2116 } else if (toType == MIRType::Int64) { 2117 masm.oolWasmTruncateCheckF32ToI64(input, output64, flags, trapSiteDesc, 2118 oolRejoin); 2119 } else { 2120 MOZ_CRASH("unexpected type"); 2121 } 2122 } else if (fromType == MIRType::Double) { 2123 if (toType == MIRType::Int32) { 2124 masm.oolWasmTruncateCheckF64ToI32(input, output, flags, trapSiteDesc, 2125 oolRejoin); 2126 } else if (toType == MIRType::Int64) { 2127 masm.oolWasmTruncateCheckF64ToI64(input, output64, flags, trapSiteDesc, 2128 oolRejoin); 2129 } else { 2130 MOZ_CRASH("unexpected type"); 2131 } 2132 } else { 2133 MOZ_CRASH("unexpected type"); 2134 } 2135 } 2136 2137 template <typename T> 2138 Operand CodeGeneratorX86Shared::toMemoryAccessOperand(T* lir, int32_t disp) { 2139 const LAllocation* ptr = lir->ptr(); 2140 const LAllocation* memoryBase = lir->memoryBase(); 2141 #ifdef JS_CODEGEN_X86 2142 Operand destAddr = ptr->isBogus() ? Operand(ToRegister(memoryBase), disp) 2143 : Operand(ToRegister(memoryBase), 2144 ToRegister(ptr), TimesOne, disp); 2145 #else 2146 auto baseReg = memoryBase->isBogus() ? HeapReg : ToRegister(memoryBase); 2147 Operand destAddr = ptr->isBogus() 2148 ? Operand(baseReg, disp) 2149 : Operand(baseReg, ToRegister(ptr), TimesOne, disp); 2150 #endif 2151 return destAddr; 2152 } 2153 2154 void CodeGenerator::visitSimd128(LSimd128* ins) { 2155 #ifdef ENABLE_WASM_SIMD 2156 const LDefinition* out = ins->output(); 2157 masm.loadConstantSimd128(ins->simd128(), ToFloatRegister(out)); 2158 #else 2159 MOZ_CRASH("No SIMD"); 2160 #endif 2161 } 2162 2163 void CodeGenerator::visitWasmTernarySimd128(LWasmTernarySimd128* ins) { 2164 #ifdef ENABLE_WASM_SIMD 2165 switch (ins->simdOp()) { 2166 case wasm::SimdOp::V128Bitselect: { 2167 FloatRegister lhsDest = ToFloatRegister(ins->v0()); 2168 FloatRegister rhs = ToFloatRegister(ins->v1()); 2169 FloatRegister control = ToFloatRegister(ins->v2()); 2170 FloatRegister temp = ToFloatRegister(ins->temp0()); 2171 masm.bitwiseSelectSimd128(control, lhsDest, rhs, lhsDest, temp); 2172 break; 2173 } 2174 case wasm::SimdOp::F32x4RelaxedMadd: 2175 masm.fmaFloat32x4(ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()), 2176 ToFloatRegister(ins->v2())); 2177 break; 2178 case wasm::SimdOp::F32x4RelaxedNmadd: 2179 masm.fnmaFloat32x4(ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()), 2180 ToFloatRegister(ins->v2())); 2181 break; 2182 case wasm::SimdOp::F64x2RelaxedMadd: 2183 masm.fmaFloat64x2(ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()), 2184 ToFloatRegister(ins->v2())); 2185 break; 2186 case wasm::SimdOp::F64x2RelaxedNmadd: 2187 masm.fnmaFloat64x2(ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()), 2188 ToFloatRegister(ins->v2())); 2189 break; 2190 case wasm::SimdOp::I8x16RelaxedLaneSelect: 2191 case wasm::SimdOp::I16x8RelaxedLaneSelect: 2192 case wasm::SimdOp::I32x4RelaxedLaneSelect: 2193 case wasm::SimdOp::I64x2RelaxedLaneSelect: { 2194 FloatRegister lhs = ToFloatRegister(ins->v0()); 2195 FloatRegister rhs = ToFloatRegister(ins->v1()); 2196 FloatRegister mask = ToFloatRegister(ins->v2()); 2197 FloatRegister dest = ToFloatRegister(ins->output()); 2198 masm.laneSelectSimd128(mask, lhs, rhs, dest); 2199 break; 2200 } 2201 case wasm::SimdOp::I32x4RelaxedDotI8x16I7x16AddS: 2202 masm.dotInt8x16Int7x16ThenAdd(ToFloatRegister(ins->v0()), 2203 ToFloatRegister(ins->v1()), 2204 ToFloatRegister(ins->v2())); 2205 break; 2206 default: 2207 MOZ_CRASH("NYI"); 2208 } 2209 #else 2210 MOZ_CRASH("No SIMD"); 2211 #endif 2212 } 2213 2214 void CodeGenerator::visitWasmBinarySimd128(LWasmBinarySimd128* ins) { 2215 #ifdef ENABLE_WASM_SIMD 2216 FloatRegister lhs = ToFloatRegister(ins->lhs()); 2217 FloatRegister rhs = ToFloatRegister(ins->rhs()); 2218 FloatRegister temp1 = ToTempFloatRegisterOrInvalid(ins->temp0()); 2219 FloatRegister temp2 = ToTempFloatRegisterOrInvalid(ins->temp1()); 2220 FloatRegister dest = ToFloatRegister(ins->output()); 2221 2222 switch (ins->simdOp()) { 2223 case wasm::SimdOp::V128And: 2224 masm.bitwiseAndSimd128(lhs, rhs, dest); 2225 break; 2226 case wasm::SimdOp::V128Or: 2227 masm.bitwiseOrSimd128(lhs, rhs, dest); 2228 break; 2229 case wasm::SimdOp::V128Xor: 2230 masm.bitwiseXorSimd128(lhs, rhs, dest); 2231 break; 2232 case wasm::SimdOp::V128AndNot: 2233 // x86/x64 specific: The CPU provides ~A & B. The operands were swapped 2234 // during lowering, and we'll compute A & ~B here as desired. 2235 masm.bitwiseNotAndSimd128(lhs, rhs, dest); 2236 break; 2237 case wasm::SimdOp::I8x16AvgrU: 2238 masm.unsignedAverageInt8x16(lhs, rhs, dest); 2239 break; 2240 case wasm::SimdOp::I16x8AvgrU: 2241 masm.unsignedAverageInt16x8(lhs, rhs, dest); 2242 break; 2243 case wasm::SimdOp::I8x16Add: 2244 masm.addInt8x16(lhs, rhs, dest); 2245 break; 2246 case wasm::SimdOp::I8x16AddSatS: 2247 masm.addSatInt8x16(lhs, rhs, dest); 2248 break; 2249 case wasm::SimdOp::I8x16AddSatU: 2250 masm.unsignedAddSatInt8x16(lhs, rhs, dest); 2251 break; 2252 case wasm::SimdOp::I8x16Sub: 2253 masm.subInt8x16(lhs, rhs, dest); 2254 break; 2255 case wasm::SimdOp::I8x16SubSatS: 2256 masm.subSatInt8x16(lhs, rhs, dest); 2257 break; 2258 case wasm::SimdOp::I8x16SubSatU: 2259 masm.unsignedSubSatInt8x16(lhs, rhs, dest); 2260 break; 2261 case wasm::SimdOp::I8x16MinS: 2262 masm.minInt8x16(lhs, rhs, dest); 2263 break; 2264 case wasm::SimdOp::I8x16MinU: 2265 masm.unsignedMinInt8x16(lhs, rhs, dest); 2266 break; 2267 case wasm::SimdOp::I8x16MaxS: 2268 masm.maxInt8x16(lhs, rhs, dest); 2269 break; 2270 case wasm::SimdOp::I8x16MaxU: 2271 masm.unsignedMaxInt8x16(lhs, rhs, dest); 2272 break; 2273 case wasm::SimdOp::I16x8Add: 2274 masm.addInt16x8(lhs, rhs, dest); 2275 break; 2276 case wasm::SimdOp::I16x8AddSatS: 2277 masm.addSatInt16x8(lhs, rhs, dest); 2278 break; 2279 case wasm::SimdOp::I16x8AddSatU: 2280 masm.unsignedAddSatInt16x8(lhs, rhs, dest); 2281 break; 2282 case wasm::SimdOp::I16x8Sub: 2283 masm.subInt16x8(lhs, rhs, dest); 2284 break; 2285 case wasm::SimdOp::I16x8SubSatS: 2286 masm.subSatInt16x8(lhs, rhs, dest); 2287 break; 2288 case wasm::SimdOp::I16x8SubSatU: 2289 masm.unsignedSubSatInt16x8(lhs, rhs, dest); 2290 break; 2291 case wasm::SimdOp::I16x8Mul: 2292 masm.mulInt16x8(lhs, rhs, dest); 2293 break; 2294 case wasm::SimdOp::I16x8MinS: 2295 masm.minInt16x8(lhs, rhs, dest); 2296 break; 2297 case wasm::SimdOp::I16x8MinU: 2298 masm.unsignedMinInt16x8(lhs, rhs, dest); 2299 break; 2300 case wasm::SimdOp::I16x8MaxS: 2301 masm.maxInt16x8(lhs, rhs, dest); 2302 break; 2303 case wasm::SimdOp::I16x8MaxU: 2304 masm.unsignedMaxInt16x8(lhs, rhs, dest); 2305 break; 2306 case wasm::SimdOp::I32x4Add: 2307 masm.addInt32x4(lhs, rhs, dest); 2308 break; 2309 case wasm::SimdOp::I32x4Sub: 2310 masm.subInt32x4(lhs, rhs, dest); 2311 break; 2312 case wasm::SimdOp::I32x4Mul: 2313 masm.mulInt32x4(lhs, rhs, dest); 2314 break; 2315 case wasm::SimdOp::I32x4MinS: 2316 masm.minInt32x4(lhs, rhs, dest); 2317 break; 2318 case wasm::SimdOp::I32x4MinU: 2319 masm.unsignedMinInt32x4(lhs, rhs, dest); 2320 break; 2321 case wasm::SimdOp::I32x4MaxS: 2322 masm.maxInt32x4(lhs, rhs, dest); 2323 break; 2324 case wasm::SimdOp::I32x4MaxU: 2325 masm.unsignedMaxInt32x4(lhs, rhs, dest); 2326 break; 2327 case wasm::SimdOp::I64x2Add: 2328 masm.addInt64x2(lhs, rhs, dest); 2329 break; 2330 case wasm::SimdOp::I64x2Sub: 2331 masm.subInt64x2(lhs, rhs, dest); 2332 break; 2333 case wasm::SimdOp::I64x2Mul: 2334 masm.mulInt64x2(lhs, rhs, dest, temp1); 2335 break; 2336 case wasm::SimdOp::F32x4Add: 2337 masm.addFloat32x4(lhs, rhs, dest); 2338 break; 2339 case wasm::SimdOp::F32x4Sub: 2340 masm.subFloat32x4(lhs, rhs, dest); 2341 break; 2342 case wasm::SimdOp::F32x4Mul: 2343 masm.mulFloat32x4(lhs, rhs, dest); 2344 break; 2345 case wasm::SimdOp::F32x4Div: 2346 masm.divFloat32x4(lhs, rhs, dest); 2347 break; 2348 case wasm::SimdOp::F32x4Min: 2349 masm.minFloat32x4(lhs, rhs, dest, temp1, temp2); 2350 break; 2351 case wasm::SimdOp::F32x4Max: 2352 masm.maxFloat32x4(lhs, rhs, dest, temp1, temp2); 2353 break; 2354 case wasm::SimdOp::F64x2Add: 2355 masm.addFloat64x2(lhs, rhs, dest); 2356 break; 2357 case wasm::SimdOp::F64x2Sub: 2358 masm.subFloat64x2(lhs, rhs, dest); 2359 break; 2360 case wasm::SimdOp::F64x2Mul: 2361 masm.mulFloat64x2(lhs, rhs, dest); 2362 break; 2363 case wasm::SimdOp::F64x2Div: 2364 masm.divFloat64x2(lhs, rhs, dest); 2365 break; 2366 case wasm::SimdOp::F64x2Min: 2367 masm.minFloat64x2(lhs, rhs, dest, temp1, temp2); 2368 break; 2369 case wasm::SimdOp::F64x2Max: 2370 masm.maxFloat64x2(lhs, rhs, dest, temp1, temp2); 2371 break; 2372 case wasm::SimdOp::I8x16Swizzle: 2373 masm.swizzleInt8x16(lhs, rhs, dest); 2374 break; 2375 case wasm::SimdOp::I8x16RelaxedSwizzle: 2376 masm.swizzleInt8x16Relaxed(lhs, rhs, dest); 2377 break; 2378 case wasm::SimdOp::I8x16NarrowI16x8S: 2379 masm.narrowInt16x8(lhs, rhs, dest); 2380 break; 2381 case wasm::SimdOp::I8x16NarrowI16x8U: 2382 masm.unsignedNarrowInt16x8(lhs, rhs, dest); 2383 break; 2384 case wasm::SimdOp::I16x8NarrowI32x4S: 2385 masm.narrowInt32x4(lhs, rhs, dest); 2386 break; 2387 case wasm::SimdOp::I16x8NarrowI32x4U: 2388 masm.unsignedNarrowInt32x4(lhs, rhs, dest); 2389 break; 2390 case wasm::SimdOp::I8x16Eq: 2391 masm.compareInt8x16(Assembler::Equal, lhs, rhs, dest); 2392 break; 2393 case wasm::SimdOp::I8x16Ne: 2394 masm.compareInt8x16(Assembler::NotEqual, lhs, rhs, dest); 2395 break; 2396 case wasm::SimdOp::I8x16LtS: 2397 masm.compareInt8x16(Assembler::LessThan, lhs, rhs, dest); 2398 break; 2399 case wasm::SimdOp::I8x16GtS: 2400 masm.compareInt8x16(Assembler::GreaterThan, lhs, rhs, dest); 2401 break; 2402 case wasm::SimdOp::I8x16LeS: 2403 masm.compareInt8x16(Assembler::LessThanOrEqual, lhs, rhs, dest); 2404 break; 2405 case wasm::SimdOp::I8x16GeS: 2406 masm.compareInt8x16(Assembler::GreaterThanOrEqual, lhs, rhs, dest); 2407 break; 2408 case wasm::SimdOp::I8x16LtU: 2409 masm.compareInt8x16(Assembler::Below, lhs, rhs, dest); 2410 break; 2411 case wasm::SimdOp::I8x16GtU: 2412 masm.compareInt8x16(Assembler::Above, lhs, rhs, dest); 2413 break; 2414 case wasm::SimdOp::I8x16LeU: 2415 masm.compareInt8x16(Assembler::BelowOrEqual, lhs, rhs, dest); 2416 break; 2417 case wasm::SimdOp::I8x16GeU: 2418 masm.compareInt8x16(Assembler::AboveOrEqual, lhs, rhs, dest); 2419 break; 2420 case wasm::SimdOp::I16x8Eq: 2421 masm.compareInt16x8(Assembler::Equal, lhs, rhs, dest); 2422 break; 2423 case wasm::SimdOp::I16x8Ne: 2424 masm.compareInt16x8(Assembler::NotEqual, lhs, rhs, dest); 2425 break; 2426 case wasm::SimdOp::I16x8LtS: 2427 masm.compareInt16x8(Assembler::LessThan, lhs, rhs, dest); 2428 break; 2429 case wasm::SimdOp::I16x8GtS: 2430 masm.compareInt16x8(Assembler::GreaterThan, lhs, rhs, dest); 2431 break; 2432 case wasm::SimdOp::I16x8LeS: 2433 masm.compareInt16x8(Assembler::LessThanOrEqual, lhs, rhs, dest); 2434 break; 2435 case wasm::SimdOp::I16x8GeS: 2436 masm.compareInt16x8(Assembler::GreaterThanOrEqual, lhs, rhs, dest); 2437 break; 2438 case wasm::SimdOp::I16x8LtU: 2439 masm.compareInt16x8(Assembler::Below, lhs, rhs, dest); 2440 break; 2441 case wasm::SimdOp::I16x8GtU: 2442 masm.compareInt16x8(Assembler::Above, lhs, rhs, dest); 2443 break; 2444 case wasm::SimdOp::I16x8LeU: 2445 masm.compareInt16x8(Assembler::BelowOrEqual, lhs, rhs, dest); 2446 break; 2447 case wasm::SimdOp::I16x8GeU: 2448 masm.compareInt16x8(Assembler::AboveOrEqual, lhs, rhs, dest); 2449 break; 2450 case wasm::SimdOp::I32x4Eq: 2451 masm.compareInt32x4(Assembler::Equal, lhs, rhs, dest); 2452 break; 2453 case wasm::SimdOp::I32x4Ne: 2454 masm.compareInt32x4(Assembler::NotEqual, lhs, rhs, dest); 2455 break; 2456 case wasm::SimdOp::I32x4LtS: 2457 masm.compareInt32x4(Assembler::LessThan, lhs, rhs, dest); 2458 break; 2459 case wasm::SimdOp::I32x4GtS: 2460 masm.compareInt32x4(Assembler::GreaterThan, lhs, rhs, dest); 2461 break; 2462 case wasm::SimdOp::I32x4LeS: 2463 masm.compareInt32x4(Assembler::LessThanOrEqual, lhs, rhs, dest); 2464 break; 2465 case wasm::SimdOp::I32x4GeS: 2466 masm.compareInt32x4(Assembler::GreaterThanOrEqual, lhs, rhs, dest); 2467 break; 2468 case wasm::SimdOp::I32x4LtU: 2469 masm.compareInt32x4(Assembler::Below, lhs, rhs, dest); 2470 break; 2471 case wasm::SimdOp::I32x4GtU: 2472 masm.compareInt32x4(Assembler::Above, lhs, rhs, dest); 2473 break; 2474 case wasm::SimdOp::I32x4LeU: 2475 masm.compareInt32x4(Assembler::BelowOrEqual, lhs, rhs, dest); 2476 break; 2477 case wasm::SimdOp::I32x4GeU: 2478 masm.compareInt32x4(Assembler::AboveOrEqual, lhs, rhs, dest); 2479 break; 2480 case wasm::SimdOp::I64x2Eq: 2481 masm.compareForEqualityInt64x2(Assembler::Equal, lhs, rhs, dest); 2482 break; 2483 case wasm::SimdOp::I64x2Ne: 2484 masm.compareForEqualityInt64x2(Assembler::NotEqual, lhs, rhs, dest); 2485 break; 2486 case wasm::SimdOp::I64x2LtS: 2487 masm.compareForOrderingInt64x2(Assembler::LessThan, lhs, rhs, dest, temp1, 2488 temp2); 2489 break; 2490 case wasm::SimdOp::I64x2GtS: 2491 masm.compareForOrderingInt64x2(Assembler::GreaterThan, lhs, rhs, dest, 2492 temp1, temp2); 2493 break; 2494 case wasm::SimdOp::I64x2LeS: 2495 masm.compareForOrderingInt64x2(Assembler::LessThanOrEqual, lhs, rhs, dest, 2496 temp1, temp2); 2497 break; 2498 case wasm::SimdOp::I64x2GeS: 2499 masm.compareForOrderingInt64x2(Assembler::GreaterThanOrEqual, lhs, rhs, 2500 dest, temp1, temp2); 2501 break; 2502 case wasm::SimdOp::F32x4Eq: 2503 masm.compareFloat32x4(Assembler::Equal, lhs, rhs, dest); 2504 break; 2505 case wasm::SimdOp::F32x4Ne: 2506 masm.compareFloat32x4(Assembler::NotEqual, lhs, rhs, dest); 2507 break; 2508 case wasm::SimdOp::F32x4Lt: 2509 masm.compareFloat32x4(Assembler::LessThan, lhs, rhs, dest); 2510 break; 2511 case wasm::SimdOp::F32x4Le: 2512 masm.compareFloat32x4(Assembler::LessThanOrEqual, lhs, rhs, dest); 2513 break; 2514 case wasm::SimdOp::F64x2Eq: 2515 masm.compareFloat64x2(Assembler::Equal, lhs, rhs, dest); 2516 break; 2517 case wasm::SimdOp::F64x2Ne: 2518 masm.compareFloat64x2(Assembler::NotEqual, lhs, rhs, dest); 2519 break; 2520 case wasm::SimdOp::F64x2Lt: 2521 masm.compareFloat64x2(Assembler::LessThan, lhs, rhs, dest); 2522 break; 2523 case wasm::SimdOp::F64x2Le: 2524 masm.compareFloat64x2(Assembler::LessThanOrEqual, lhs, rhs, dest); 2525 break; 2526 case wasm::SimdOp::F32x4PMax: 2527 // `lhs` and `rhs` are swapped, for non-VEX platforms the output is rhs. 2528 masm.pseudoMaxFloat32x4(lhs, rhs, dest); 2529 break; 2530 case wasm::SimdOp::F32x4PMin: 2531 // `lhs` and `rhs` are swapped, for non-VEX platforms the output is rhs. 2532 masm.pseudoMinFloat32x4(lhs, rhs, dest); 2533 break; 2534 case wasm::SimdOp::F64x2PMax: 2535 // `lhs` and `rhs` are swapped, for non-VEX platforms the output is rhs. 2536 masm.pseudoMaxFloat64x2(lhs, rhs, dest); 2537 break; 2538 case wasm::SimdOp::F64x2PMin: 2539 // `lhs` and `rhs` are swapped, for non-VEX platforms the output is rhs. 2540 masm.pseudoMinFloat64x2(lhs, rhs, dest); 2541 break; 2542 case wasm::SimdOp::I32x4DotI16x8S: 2543 masm.widenDotInt16x8(lhs, rhs, dest); 2544 break; 2545 case wasm::SimdOp::I16x8ExtmulLowI8x16S: 2546 masm.extMulLowInt8x16(lhs, rhs, dest); 2547 break; 2548 case wasm::SimdOp::I16x8ExtmulHighI8x16S: 2549 masm.extMulHighInt8x16(lhs, rhs, dest); 2550 break; 2551 case wasm::SimdOp::I16x8ExtmulLowI8x16U: 2552 masm.unsignedExtMulLowInt8x16(lhs, rhs, dest); 2553 break; 2554 case wasm::SimdOp::I16x8ExtmulHighI8x16U: 2555 masm.unsignedExtMulHighInt8x16(lhs, rhs, dest); 2556 break; 2557 case wasm::SimdOp::I32x4ExtmulLowI16x8S: 2558 masm.extMulLowInt16x8(lhs, rhs, dest); 2559 break; 2560 case wasm::SimdOp::I32x4ExtmulHighI16x8S: 2561 masm.extMulHighInt16x8(lhs, rhs, dest); 2562 break; 2563 case wasm::SimdOp::I32x4ExtmulLowI16x8U: 2564 masm.unsignedExtMulLowInt16x8(lhs, rhs, dest); 2565 break; 2566 case wasm::SimdOp::I32x4ExtmulHighI16x8U: 2567 masm.unsignedExtMulHighInt16x8(lhs, rhs, dest); 2568 break; 2569 case wasm::SimdOp::I64x2ExtmulLowI32x4S: 2570 masm.extMulLowInt32x4(lhs, rhs, dest); 2571 break; 2572 case wasm::SimdOp::I64x2ExtmulHighI32x4S: 2573 masm.extMulHighInt32x4(lhs, rhs, dest); 2574 break; 2575 case wasm::SimdOp::I64x2ExtmulLowI32x4U: 2576 masm.unsignedExtMulLowInt32x4(lhs, rhs, dest); 2577 break; 2578 case wasm::SimdOp::I64x2ExtmulHighI32x4U: 2579 masm.unsignedExtMulHighInt32x4(lhs, rhs, dest); 2580 break; 2581 case wasm::SimdOp::I16x8Q15MulrSatS: 2582 masm.q15MulrSatInt16x8(lhs, rhs, dest); 2583 break; 2584 case wasm::SimdOp::F32x4RelaxedMin: 2585 masm.minFloat32x4Relaxed(lhs, rhs, dest); 2586 break; 2587 case wasm::SimdOp::F32x4RelaxedMax: 2588 masm.maxFloat32x4Relaxed(lhs, rhs, dest); 2589 break; 2590 case wasm::SimdOp::F64x2RelaxedMin: 2591 masm.minFloat64x2Relaxed(lhs, rhs, dest); 2592 break; 2593 case wasm::SimdOp::F64x2RelaxedMax: 2594 masm.maxFloat64x2Relaxed(lhs, rhs, dest); 2595 break; 2596 case wasm::SimdOp::I16x8RelaxedQ15MulrS: 2597 masm.q15MulrInt16x8Relaxed(lhs, rhs, dest); 2598 break; 2599 case wasm::SimdOp::I16x8RelaxedDotI8x16I7x16S: 2600 masm.dotInt8x16Int7x16(lhs, rhs, dest); 2601 break; 2602 case wasm::SimdOp::MozPMADDUBSW: 2603 masm.vpmaddubsw(rhs, lhs, dest); 2604 break; 2605 default: 2606 MOZ_CRASH("Binary SimdOp not implemented"); 2607 } 2608 #else 2609 MOZ_CRASH("No SIMD"); 2610 #endif 2611 } 2612 2613 void CodeGenerator::visitWasmBinarySimd128WithConstant( 2614 LWasmBinarySimd128WithConstant* ins) { 2615 #ifdef ENABLE_WASM_SIMD 2616 FloatRegister lhs = ToFloatRegister(ins->lhs()); 2617 const SimdConstant& rhs = ins->rhs(); 2618 FloatRegister dest = ToFloatRegister(ins->output()); 2619 FloatRegister temp = ToTempFloatRegisterOrInvalid(ins->temp0()); 2620 2621 switch (ins->mir()->simdOp()) { 2622 case wasm::SimdOp::I8x16Add: 2623 masm.addInt8x16(lhs, rhs, dest); 2624 break; 2625 case wasm::SimdOp::I16x8Add: 2626 masm.addInt16x8(lhs, rhs, dest); 2627 break; 2628 case wasm::SimdOp::I32x4Add: 2629 masm.addInt32x4(lhs, rhs, dest); 2630 break; 2631 case wasm::SimdOp::I64x2Add: 2632 masm.addInt64x2(lhs, rhs, dest); 2633 break; 2634 case wasm::SimdOp::I8x16Sub: 2635 masm.subInt8x16(lhs, rhs, dest); 2636 break; 2637 case wasm::SimdOp::I16x8Sub: 2638 masm.subInt16x8(lhs, rhs, dest); 2639 break; 2640 case wasm::SimdOp::I32x4Sub: 2641 masm.subInt32x4(lhs, rhs, dest); 2642 break; 2643 case wasm::SimdOp::I64x2Sub: 2644 masm.subInt64x2(lhs, rhs, dest); 2645 break; 2646 case wasm::SimdOp::I16x8Mul: 2647 masm.mulInt16x8(lhs, rhs, dest); 2648 break; 2649 case wasm::SimdOp::I32x4Mul: 2650 masm.mulInt32x4(lhs, rhs, dest); 2651 break; 2652 case wasm::SimdOp::I8x16AddSatS: 2653 masm.addSatInt8x16(lhs, rhs, dest); 2654 break; 2655 case wasm::SimdOp::I8x16AddSatU: 2656 masm.unsignedAddSatInt8x16(lhs, rhs, dest); 2657 break; 2658 case wasm::SimdOp::I16x8AddSatS: 2659 masm.addSatInt16x8(lhs, rhs, dest); 2660 break; 2661 case wasm::SimdOp::I16x8AddSatU: 2662 masm.unsignedAddSatInt16x8(lhs, rhs, dest); 2663 break; 2664 case wasm::SimdOp::I8x16SubSatS: 2665 masm.subSatInt8x16(lhs, rhs, dest); 2666 break; 2667 case wasm::SimdOp::I8x16SubSatU: 2668 masm.unsignedSubSatInt8x16(lhs, rhs, dest); 2669 break; 2670 case wasm::SimdOp::I16x8SubSatS: 2671 masm.subSatInt16x8(lhs, rhs, dest); 2672 break; 2673 case wasm::SimdOp::I16x8SubSatU: 2674 masm.unsignedSubSatInt16x8(lhs, rhs, dest); 2675 break; 2676 case wasm::SimdOp::I8x16MinS: 2677 masm.minInt8x16(lhs, rhs, dest); 2678 break; 2679 case wasm::SimdOp::I8x16MinU: 2680 masm.unsignedMinInt8x16(lhs, rhs, dest); 2681 break; 2682 case wasm::SimdOp::I16x8MinS: 2683 masm.minInt16x8(lhs, rhs, dest); 2684 break; 2685 case wasm::SimdOp::I16x8MinU: 2686 masm.unsignedMinInt16x8(lhs, rhs, dest); 2687 break; 2688 case wasm::SimdOp::I32x4MinS: 2689 masm.minInt32x4(lhs, rhs, dest); 2690 break; 2691 case wasm::SimdOp::I32x4MinU: 2692 masm.unsignedMinInt32x4(lhs, rhs, dest); 2693 break; 2694 case wasm::SimdOp::I8x16MaxS: 2695 masm.maxInt8x16(lhs, rhs, dest); 2696 break; 2697 case wasm::SimdOp::I8x16MaxU: 2698 masm.unsignedMaxInt8x16(lhs, rhs, dest); 2699 break; 2700 case wasm::SimdOp::I16x8MaxS: 2701 masm.maxInt16x8(lhs, rhs, dest); 2702 break; 2703 case wasm::SimdOp::I16x8MaxU: 2704 masm.unsignedMaxInt16x8(lhs, rhs, dest); 2705 break; 2706 case wasm::SimdOp::I32x4MaxS: 2707 masm.maxInt32x4(lhs, rhs, dest); 2708 break; 2709 case wasm::SimdOp::I32x4MaxU: 2710 masm.unsignedMaxInt32x4(lhs, rhs, dest); 2711 break; 2712 case wasm::SimdOp::V128And: 2713 masm.bitwiseAndSimd128(lhs, rhs, dest); 2714 break; 2715 case wasm::SimdOp::V128Or: 2716 masm.bitwiseOrSimd128(lhs, rhs, dest); 2717 break; 2718 case wasm::SimdOp::V128Xor: 2719 masm.bitwiseXorSimd128(lhs, rhs, dest); 2720 break; 2721 case wasm::SimdOp::I8x16Eq: 2722 masm.compareInt8x16(Assembler::Equal, lhs, rhs, dest); 2723 break; 2724 case wasm::SimdOp::I8x16Ne: 2725 masm.compareInt8x16(Assembler::NotEqual, lhs, rhs, dest); 2726 break; 2727 case wasm::SimdOp::I8x16GtS: 2728 masm.compareInt8x16(Assembler::GreaterThan, lhs, rhs, dest); 2729 break; 2730 case wasm::SimdOp::I8x16LeS: 2731 masm.compareInt8x16(Assembler::LessThanOrEqual, lhs, rhs, dest); 2732 break; 2733 case wasm::SimdOp::I16x8Eq: 2734 masm.compareInt16x8(Assembler::Equal, lhs, rhs, dest); 2735 break; 2736 case wasm::SimdOp::I16x8Ne: 2737 masm.compareInt16x8(Assembler::NotEqual, lhs, rhs, dest); 2738 break; 2739 case wasm::SimdOp::I16x8GtS: 2740 masm.compareInt16x8(Assembler::GreaterThan, lhs, rhs, dest); 2741 break; 2742 case wasm::SimdOp::I16x8LeS: 2743 masm.compareInt16x8(Assembler::LessThanOrEqual, lhs, rhs, dest); 2744 break; 2745 case wasm::SimdOp::I32x4Eq: 2746 masm.compareInt32x4(Assembler::Equal, lhs, rhs, dest); 2747 break; 2748 case wasm::SimdOp::I32x4Ne: 2749 masm.compareInt32x4(Assembler::NotEqual, lhs, rhs, dest); 2750 break; 2751 case wasm::SimdOp::I32x4GtS: 2752 masm.compareInt32x4(Assembler::GreaterThan, lhs, rhs, dest); 2753 break; 2754 case wasm::SimdOp::I32x4LeS: 2755 masm.compareInt32x4(Assembler::LessThanOrEqual, lhs, rhs, dest); 2756 break; 2757 case wasm::SimdOp::I64x2Mul: 2758 masm.mulInt64x2(lhs, rhs, dest, temp); 2759 break; 2760 case wasm::SimdOp::F32x4Eq: 2761 masm.compareFloat32x4(Assembler::Equal, lhs, rhs, dest); 2762 break; 2763 case wasm::SimdOp::F32x4Ne: 2764 masm.compareFloat32x4(Assembler::NotEqual, lhs, rhs, dest); 2765 break; 2766 case wasm::SimdOp::F32x4Lt: 2767 masm.compareFloat32x4(Assembler::LessThan, lhs, rhs, dest); 2768 break; 2769 case wasm::SimdOp::F32x4Le: 2770 masm.compareFloat32x4(Assembler::LessThanOrEqual, lhs, rhs, dest); 2771 break; 2772 case wasm::SimdOp::F64x2Eq: 2773 masm.compareFloat64x2(Assembler::Equal, lhs, rhs, dest); 2774 break; 2775 case wasm::SimdOp::F64x2Ne: 2776 masm.compareFloat64x2(Assembler::NotEqual, lhs, rhs, dest); 2777 break; 2778 case wasm::SimdOp::F64x2Lt: 2779 masm.compareFloat64x2(Assembler::LessThan, lhs, rhs, dest); 2780 break; 2781 case wasm::SimdOp::F64x2Le: 2782 masm.compareFloat64x2(Assembler::LessThanOrEqual, lhs, rhs, dest); 2783 break; 2784 case wasm::SimdOp::I32x4DotI16x8S: 2785 masm.widenDotInt16x8(lhs, rhs, dest); 2786 break; 2787 case wasm::SimdOp::F32x4Add: 2788 masm.addFloat32x4(lhs, rhs, dest); 2789 break; 2790 case wasm::SimdOp::F64x2Add: 2791 masm.addFloat64x2(lhs, rhs, dest); 2792 break; 2793 case wasm::SimdOp::F32x4Sub: 2794 masm.subFloat32x4(lhs, rhs, dest); 2795 break; 2796 case wasm::SimdOp::F64x2Sub: 2797 masm.subFloat64x2(lhs, rhs, dest); 2798 break; 2799 case wasm::SimdOp::F32x4Div: 2800 masm.divFloat32x4(lhs, rhs, dest); 2801 break; 2802 case wasm::SimdOp::F64x2Div: 2803 masm.divFloat64x2(lhs, rhs, dest); 2804 break; 2805 case wasm::SimdOp::F32x4Mul: 2806 masm.mulFloat32x4(lhs, rhs, dest); 2807 break; 2808 case wasm::SimdOp::F64x2Mul: 2809 masm.mulFloat64x2(lhs, rhs, dest); 2810 break; 2811 case wasm::SimdOp::I8x16NarrowI16x8S: 2812 masm.narrowInt16x8(lhs, rhs, dest); 2813 break; 2814 case wasm::SimdOp::I8x16NarrowI16x8U: 2815 masm.unsignedNarrowInt16x8(lhs, rhs, dest); 2816 break; 2817 case wasm::SimdOp::I16x8NarrowI32x4S: 2818 masm.narrowInt32x4(lhs, rhs, dest); 2819 break; 2820 case wasm::SimdOp::I16x8NarrowI32x4U: 2821 masm.unsignedNarrowInt32x4(lhs, rhs, dest); 2822 break; 2823 default: 2824 MOZ_CRASH("Binary SimdOp with constant not implemented"); 2825 } 2826 #else 2827 MOZ_CRASH("No SIMD"); 2828 #endif 2829 } 2830 2831 void CodeGenerator::visitWasmVariableShiftSimd128( 2832 LWasmVariableShiftSimd128* ins) { 2833 #ifdef ENABLE_WASM_SIMD 2834 FloatRegister lhsDest = ToFloatRegister(ins->lhs()); 2835 Register rhs = ToRegister(ins->rhs()); 2836 FloatRegister temp = ToTempFloatRegisterOrInvalid(ins->temp0()); 2837 2838 MOZ_ASSERT(ToFloatRegister(ins->output()) == lhsDest); 2839 2840 switch (ins->mir()->simdOp()) { 2841 case wasm::SimdOp::I8x16Shl: 2842 masm.leftShiftInt8x16(rhs, lhsDest, temp); 2843 break; 2844 case wasm::SimdOp::I8x16ShrS: 2845 masm.rightShiftInt8x16(rhs, lhsDest, temp); 2846 break; 2847 case wasm::SimdOp::I8x16ShrU: 2848 masm.unsignedRightShiftInt8x16(rhs, lhsDest, temp); 2849 break; 2850 case wasm::SimdOp::I16x8Shl: 2851 masm.leftShiftInt16x8(rhs, lhsDest); 2852 break; 2853 case wasm::SimdOp::I16x8ShrS: 2854 masm.rightShiftInt16x8(rhs, lhsDest); 2855 break; 2856 case wasm::SimdOp::I16x8ShrU: 2857 masm.unsignedRightShiftInt16x8(rhs, lhsDest); 2858 break; 2859 case wasm::SimdOp::I32x4Shl: 2860 masm.leftShiftInt32x4(rhs, lhsDest); 2861 break; 2862 case wasm::SimdOp::I32x4ShrS: 2863 masm.rightShiftInt32x4(rhs, lhsDest); 2864 break; 2865 case wasm::SimdOp::I32x4ShrU: 2866 masm.unsignedRightShiftInt32x4(rhs, lhsDest); 2867 break; 2868 case wasm::SimdOp::I64x2Shl: 2869 masm.leftShiftInt64x2(rhs, lhsDest); 2870 break; 2871 case wasm::SimdOp::I64x2ShrS: 2872 masm.rightShiftInt64x2(rhs, lhsDest, temp); 2873 break; 2874 case wasm::SimdOp::I64x2ShrU: 2875 masm.unsignedRightShiftInt64x2(rhs, lhsDest); 2876 break; 2877 default: 2878 MOZ_CRASH("Shift SimdOp not implemented"); 2879 } 2880 #else 2881 MOZ_CRASH("No SIMD"); 2882 #endif 2883 } 2884 2885 void CodeGenerator::visitWasmConstantShiftSimd128( 2886 LWasmConstantShiftSimd128* ins) { 2887 #ifdef ENABLE_WASM_SIMD 2888 FloatRegister src = ToFloatRegister(ins->src()); 2889 FloatRegister dest = ToFloatRegister(ins->output()); 2890 int32_t shift = ins->shift(); 2891 2892 if (shift == 0) { 2893 masm.moveSimd128(src, dest); 2894 return; 2895 } 2896 2897 switch (ins->mir()->simdOp()) { 2898 case wasm::SimdOp::I8x16Shl: 2899 masm.leftShiftInt8x16(Imm32(shift), src, dest); 2900 break; 2901 case wasm::SimdOp::I8x16ShrS: 2902 masm.rightShiftInt8x16(Imm32(shift), src, dest); 2903 break; 2904 case wasm::SimdOp::I8x16ShrU: 2905 masm.unsignedRightShiftInt8x16(Imm32(shift), src, dest); 2906 break; 2907 case wasm::SimdOp::I16x8Shl: 2908 masm.leftShiftInt16x8(Imm32(shift), src, dest); 2909 break; 2910 case wasm::SimdOp::I16x8ShrS: 2911 masm.rightShiftInt16x8(Imm32(shift), src, dest); 2912 break; 2913 case wasm::SimdOp::I16x8ShrU: 2914 masm.unsignedRightShiftInt16x8(Imm32(shift), src, dest); 2915 break; 2916 case wasm::SimdOp::I32x4Shl: 2917 masm.leftShiftInt32x4(Imm32(shift), src, dest); 2918 break; 2919 case wasm::SimdOp::I32x4ShrS: 2920 masm.rightShiftInt32x4(Imm32(shift), src, dest); 2921 break; 2922 case wasm::SimdOp::I32x4ShrU: 2923 masm.unsignedRightShiftInt32x4(Imm32(shift), src, dest); 2924 break; 2925 case wasm::SimdOp::I64x2Shl: 2926 masm.leftShiftInt64x2(Imm32(shift), src, dest); 2927 break; 2928 case wasm::SimdOp::I64x2ShrS: 2929 masm.rightShiftInt64x2(Imm32(shift), src, dest); 2930 break; 2931 case wasm::SimdOp::I64x2ShrU: 2932 masm.unsignedRightShiftInt64x2(Imm32(shift), src, dest); 2933 break; 2934 default: 2935 MOZ_CRASH("Shift SimdOp not implemented"); 2936 } 2937 #else 2938 MOZ_CRASH("No SIMD"); 2939 #endif 2940 } 2941 2942 void CodeGenerator::visitWasmSignReplicationSimd128( 2943 LWasmSignReplicationSimd128* ins) { 2944 #ifdef ENABLE_WASM_SIMD 2945 FloatRegister src = ToFloatRegister(ins->src()); 2946 FloatRegister dest = ToFloatRegister(ins->output()); 2947 2948 switch (ins->mir()->simdOp()) { 2949 case wasm::SimdOp::I8x16ShrS: 2950 masm.signReplicationInt8x16(src, dest); 2951 break; 2952 case wasm::SimdOp::I16x8ShrS: 2953 masm.signReplicationInt16x8(src, dest); 2954 break; 2955 case wasm::SimdOp::I32x4ShrS: 2956 masm.signReplicationInt32x4(src, dest); 2957 break; 2958 case wasm::SimdOp::I64x2ShrS: 2959 masm.signReplicationInt64x2(src, dest); 2960 break; 2961 default: 2962 MOZ_CRASH("Shift SimdOp unsupported sign replication optimization"); 2963 } 2964 #else 2965 MOZ_CRASH("No SIMD"); 2966 #endif 2967 } 2968 2969 void CodeGenerator::visitWasmShuffleSimd128(LWasmShuffleSimd128* ins) { 2970 #ifdef ENABLE_WASM_SIMD 2971 FloatRegister lhsDest = ToFloatRegister(ins->lhs()); 2972 FloatRegister rhs = ToFloatRegister(ins->rhs()); 2973 SimdConstant control = ins->control(); 2974 FloatRegister output = ToFloatRegister(ins->output()); 2975 switch (ins->op()) { 2976 case SimdShuffleOp::BLEND_8x16: { 2977 masm.blendInt8x16(reinterpret_cast<const uint8_t*>(control.asInt8x16()), 2978 lhsDest, rhs, output, ToFloatRegister(ins->temp0())); 2979 break; 2980 } 2981 case SimdShuffleOp::BLEND_16x8: { 2982 MOZ_ASSERT(ins->temp0()->isBogusTemp()); 2983 masm.blendInt16x8(reinterpret_cast<const uint16_t*>(control.asInt16x8()), 2984 lhsDest, rhs, output); 2985 break; 2986 } 2987 case SimdShuffleOp::CONCAT_RIGHT_SHIFT_8x16: { 2988 MOZ_ASSERT(ins->temp0()->isBogusTemp()); 2989 int8_t count = 16 - control.asInt8x16()[0]; 2990 MOZ_ASSERT(count > 0, "Should have been a MOVE operation"); 2991 masm.concatAndRightShiftSimd128(lhsDest, rhs, output, count); 2992 break; 2993 } 2994 case SimdShuffleOp::INTERLEAVE_HIGH_8x16: { 2995 MOZ_ASSERT(ins->temp0()->isBogusTemp()); 2996 masm.interleaveHighInt8x16(lhsDest, rhs, output); 2997 break; 2998 } 2999 case SimdShuffleOp::INTERLEAVE_HIGH_16x8: { 3000 MOZ_ASSERT(ins->temp0()->isBogusTemp()); 3001 masm.interleaveHighInt16x8(lhsDest, rhs, output); 3002 break; 3003 } 3004 case SimdShuffleOp::INTERLEAVE_HIGH_32x4: { 3005 MOZ_ASSERT(ins->temp0()->isBogusTemp()); 3006 masm.interleaveHighInt32x4(lhsDest, rhs, output); 3007 break; 3008 } 3009 case SimdShuffleOp::INTERLEAVE_HIGH_64x2: { 3010 MOZ_ASSERT(ins->temp0()->isBogusTemp()); 3011 masm.interleaveHighInt64x2(lhsDest, rhs, output); 3012 break; 3013 } 3014 case SimdShuffleOp::INTERLEAVE_LOW_8x16: { 3015 MOZ_ASSERT(ins->temp0()->isBogusTemp()); 3016 masm.interleaveLowInt8x16(lhsDest, rhs, output); 3017 break; 3018 } 3019 case SimdShuffleOp::INTERLEAVE_LOW_16x8: { 3020 MOZ_ASSERT(ins->temp0()->isBogusTemp()); 3021 masm.interleaveLowInt16x8(lhsDest, rhs, output); 3022 break; 3023 } 3024 case SimdShuffleOp::INTERLEAVE_LOW_32x4: { 3025 MOZ_ASSERT(ins->temp0()->isBogusTemp()); 3026 masm.interleaveLowInt32x4(lhsDest, rhs, output); 3027 break; 3028 } 3029 case SimdShuffleOp::INTERLEAVE_LOW_64x2: { 3030 MOZ_ASSERT(ins->temp0()->isBogusTemp()); 3031 masm.interleaveLowInt64x2(lhsDest, rhs, output); 3032 break; 3033 } 3034 case SimdShuffleOp::SHUFFLE_BLEND_8x16: { 3035 masm.shuffleInt8x16(reinterpret_cast<const uint8_t*>(control.asInt8x16()), 3036 lhsDest, rhs, output); 3037 break; 3038 } 3039 default: { 3040 MOZ_CRASH("Unsupported SIMD shuffle operation"); 3041 } 3042 } 3043 #else 3044 MOZ_CRASH("No SIMD"); 3045 #endif 3046 } 3047 3048 #ifdef ENABLE_WASM_SIMD 3049 3050 enum PermuteX64I16x8Action : uint16_t { 3051 UNAVAILABLE = 0, 3052 SWAP_QWORDS = 1, // Swap qwords first 3053 PERM_LOW = 2, // Permute low qword by control_[0..3] 3054 PERM_HIGH = 4 // Permute high qword by control_[4..7] 3055 }; 3056 3057 // Skip lanes that equal v starting at i, returning the index just beyond the 3058 // last of those. There is no requirement that the initial lanes[i] == v. 3059 template <typename T> 3060 static int ScanConstant(const T* lanes, int v, int i) { 3061 int len = int(16 / sizeof(T)); 3062 MOZ_ASSERT(i <= len); 3063 while (i < len && lanes[i] == v) { 3064 i++; 3065 } 3066 return i; 3067 } 3068 3069 // Apply a transformation to each lane value. 3070 template <typename T> 3071 static void MapLanes(T* result, const T* input, int (*f)(int)) { 3072 // Hazard analysis trips on "IndirectCall: f" error. 3073 // Suppress the check -- `f` is expected to be trivial here. 3074 JS::AutoSuppressGCAnalysis nogc; 3075 3076 int len = int(16 / sizeof(T)); 3077 for (int i = 0; i < len; i++) { 3078 result[i] = f(input[i]); 3079 } 3080 } 3081 3082 // Recognize part of an identity permutation starting at start, with 3083 // the first value of the permutation expected to be bias. 3084 template <typename T> 3085 static bool IsIdentity(const T* lanes, int start, int len, int bias) { 3086 if (lanes[start] != bias) { 3087 return false; 3088 } 3089 for (int i = start + 1; i < start + len; i++) { 3090 if (lanes[i] != lanes[i - 1] + 1) { 3091 return false; 3092 } 3093 } 3094 return true; 3095 } 3096 3097 // We can permute by words if the mask is reducible to a word mask, but the x64 3098 // lowering is only efficient if we can permute the high and low quadwords 3099 // separately, possibly after swapping quadwords. 3100 static PermuteX64I16x8Action CalculateX64Permute16x8(SimdConstant* control) { 3101 const SimdConstant::I16x8& lanes = control->asInt16x8(); 3102 SimdConstant::I16x8 mapped; 3103 MapLanes(mapped, lanes, [](int x) -> int { return x < 4 ? 0 : 1; }); 3104 int i = ScanConstant(mapped, mapped[0], 0); 3105 if (i != 4) { 3106 return PermuteX64I16x8Action::UNAVAILABLE; 3107 } 3108 i = ScanConstant(mapped, mapped[4], 4); 3109 if (i != 8) { 3110 return PermuteX64I16x8Action::UNAVAILABLE; 3111 } 3112 // Now compute the operation bits. `mapped` holds the adjusted lane mask. 3113 memcpy(mapped, lanes, sizeof(mapped)); 3114 uint16_t op = 0; 3115 if (mapped[0] > mapped[4]) { 3116 op |= PermuteX64I16x8Action::SWAP_QWORDS; 3117 } 3118 for (auto& m : mapped) { 3119 m &= 3; 3120 } 3121 if (!IsIdentity(mapped, 0, 4, 0)) { 3122 op |= PermuteX64I16x8Action::PERM_LOW; 3123 } 3124 if (!IsIdentity(mapped, 4, 4, 0)) { 3125 op |= PermuteX64I16x8Action::PERM_HIGH; 3126 } 3127 MOZ_ASSERT(op != PermuteX64I16x8Action::UNAVAILABLE); 3128 *control = SimdConstant::CreateX8(mapped); 3129 return (PermuteX64I16x8Action)op; 3130 } 3131 3132 #endif 3133 3134 void CodeGenerator::visitWasmPermuteSimd128(LWasmPermuteSimd128* ins) { 3135 #ifdef ENABLE_WASM_SIMD 3136 FloatRegister src = ToFloatRegister(ins->src()); 3137 FloatRegister dest = ToFloatRegister(ins->output()); 3138 SimdConstant control = ins->control(); 3139 switch (ins->op()) { 3140 // For broadcast, would MOVDDUP be better than PSHUFD for the last step? 3141 case SimdPermuteOp::BROADCAST_8x16: { 3142 const SimdConstant::I8x16& mask = control.asInt8x16(); 3143 int8_t source = mask[0]; 3144 if (source == 0 && Assembler::HasAVX2()) { 3145 masm.vbroadcastb(Operand(src), dest); 3146 break; 3147 } 3148 MOZ_ASSERT_IF(!Assembler::HasAVX(), src == dest); 3149 if (source < 8) { 3150 masm.interleaveLowInt8x16(src, src, dest); 3151 } else { 3152 masm.interleaveHighInt8x16(src, src, dest); 3153 source -= 8; 3154 } 3155 uint16_t v = uint16_t(source & 3); 3156 uint16_t wordMask[4] = {v, v, v, v}; 3157 if (source < 4) { 3158 masm.permuteLowInt16x8(wordMask, dest, dest); 3159 uint32_t dwordMask[4] = {0, 0, 0, 0}; 3160 masm.permuteInt32x4(dwordMask, dest, dest); 3161 } else { 3162 masm.permuteHighInt16x8(wordMask, dest, dest); 3163 uint32_t dwordMask[4] = {2, 2, 2, 2}; 3164 masm.permuteInt32x4(dwordMask, dest, dest); 3165 } 3166 break; 3167 } 3168 case SimdPermuteOp::BROADCAST_16x8: { 3169 const SimdConstant::I16x8& mask = control.asInt16x8(); 3170 int16_t source = mask[0]; 3171 if (source == 0 && Assembler::HasAVX2()) { 3172 masm.vbroadcastw(Operand(src), dest); 3173 break; 3174 } 3175 uint16_t v = uint16_t(source & 3); 3176 uint16_t wordMask[4] = {v, v, v, v}; 3177 if (source < 4) { 3178 masm.permuteLowInt16x8(wordMask, src, dest); 3179 uint32_t dwordMask[4] = {0, 0, 0, 0}; 3180 masm.permuteInt32x4(dwordMask, dest, dest); 3181 } else { 3182 masm.permuteHighInt16x8(wordMask, src, dest); 3183 uint32_t dwordMask[4] = {2, 2, 2, 2}; 3184 masm.permuteInt32x4(dwordMask, dest, dest); 3185 } 3186 break; 3187 } 3188 case SimdPermuteOp::MOVE: { 3189 masm.moveSimd128(src, dest); 3190 break; 3191 } 3192 case SimdPermuteOp::PERMUTE_8x16: { 3193 const SimdConstant::I8x16& mask = control.asInt8x16(); 3194 # ifdef DEBUG 3195 DebugOnly<int> i; 3196 for (i = 0; i < 16 && mask[i] == i; i++) { 3197 } 3198 MOZ_ASSERT(i < 16, "Should have been a MOVE operation"); 3199 # endif 3200 masm.permuteInt8x16(reinterpret_cast<const uint8_t*>(mask), src, dest); 3201 break; 3202 } 3203 case SimdPermuteOp::PERMUTE_16x8: { 3204 # ifdef DEBUG 3205 const SimdConstant::I16x8& mask = control.asInt16x8(); 3206 DebugOnly<int> i; 3207 for (i = 0; i < 8 && mask[i] == i; i++) { 3208 } 3209 MOZ_ASSERT(i < 8, "Should have been a MOVE operation"); 3210 # endif 3211 PermuteX64I16x8Action op = CalculateX64Permute16x8(&control); 3212 if (op != PermuteX64I16x8Action::UNAVAILABLE) { 3213 const SimdConstant::I16x8& mask = control.asInt16x8(); 3214 if (op & PermuteX64I16x8Action::SWAP_QWORDS) { 3215 uint32_t dwordMask[4] = {2, 3, 0, 1}; 3216 masm.permuteInt32x4(dwordMask, src, dest); 3217 src = dest; 3218 } 3219 if (op & PermuteX64I16x8Action::PERM_LOW) { 3220 masm.permuteLowInt16x8(reinterpret_cast<const uint16_t*>(mask) + 0, 3221 src, dest); 3222 src = dest; 3223 } 3224 if (op & PermuteX64I16x8Action::PERM_HIGH) { 3225 masm.permuteHighInt16x8(reinterpret_cast<const uint16_t*>(mask) + 4, 3226 src, dest); 3227 src = dest; 3228 } 3229 } else { 3230 const SimdConstant::I16x8& wmask = control.asInt16x8(); 3231 uint8_t mask[16]; 3232 for (unsigned i = 0; i < 16; i += 2) { 3233 mask[i] = wmask[i / 2] * 2; 3234 mask[i + 1] = wmask[i / 2] * 2 + 1; 3235 } 3236 masm.permuteInt8x16(mask, src, dest); 3237 } 3238 break; 3239 } 3240 case SimdPermuteOp::PERMUTE_32x4: { 3241 const SimdConstant::I32x4& mask = control.asInt32x4(); 3242 if (Assembler::HasAVX2() && mask[0] == 0 && mask[1] == 0 && 3243 mask[2] == 0 && mask[3] == 0) { 3244 masm.vbroadcastd(Operand(src), dest); 3245 break; 3246 } 3247 # ifdef DEBUG 3248 DebugOnly<int> i; 3249 for (i = 0; i < 4 && mask[i] == i; i++) { 3250 } 3251 MOZ_ASSERT(i < 4, "Should have been a MOVE operation"); 3252 # endif 3253 masm.permuteInt32x4(reinterpret_cast<const uint32_t*>(mask), src, dest); 3254 break; 3255 } 3256 case SimdPermuteOp::ROTATE_RIGHT_8x16: { 3257 MOZ_ASSERT_IF(!Assembler::HasAVX(), src == dest); 3258 int8_t count = control.asInt8x16()[0]; 3259 MOZ_ASSERT(count > 0, "Should have been a MOVE operation"); 3260 masm.concatAndRightShiftSimd128(src, src, dest, count); 3261 break; 3262 } 3263 case SimdPermuteOp::SHIFT_LEFT_8x16: { 3264 int8_t count = control.asInt8x16()[0]; 3265 MOZ_ASSERT(count > 0, "Should have been a MOVE operation"); 3266 masm.leftShiftSimd128(Imm32(count), src, dest); 3267 break; 3268 } 3269 case SimdPermuteOp::SHIFT_RIGHT_8x16: { 3270 int8_t count = control.asInt8x16()[0]; 3271 MOZ_ASSERT(count > 0, "Should have been a MOVE operation"); 3272 masm.rightShiftSimd128(Imm32(count), src, dest); 3273 break; 3274 } 3275 case SimdPermuteOp::ZERO_EXTEND_8x16_TO_16x8: 3276 masm.zeroExtend8x16To16x8(src, dest); 3277 break; 3278 case SimdPermuteOp::ZERO_EXTEND_8x16_TO_32x4: 3279 masm.zeroExtend8x16To32x4(src, dest); 3280 break; 3281 case SimdPermuteOp::ZERO_EXTEND_8x16_TO_64x2: 3282 masm.zeroExtend8x16To64x2(src, dest); 3283 break; 3284 case SimdPermuteOp::ZERO_EXTEND_16x8_TO_32x4: 3285 masm.zeroExtend16x8To32x4(src, dest); 3286 break; 3287 case SimdPermuteOp::ZERO_EXTEND_16x8_TO_64x2: 3288 masm.zeroExtend16x8To64x2(src, dest); 3289 break; 3290 case SimdPermuteOp::ZERO_EXTEND_32x4_TO_64x2: 3291 masm.zeroExtend32x4To64x2(src, dest); 3292 break; 3293 case SimdPermuteOp::REVERSE_16x8: 3294 masm.reverseInt16x8(src, dest); 3295 break; 3296 case SimdPermuteOp::REVERSE_32x4: 3297 masm.reverseInt32x4(src, dest); 3298 break; 3299 case SimdPermuteOp::REVERSE_64x2: 3300 masm.reverseInt64x2(src, dest); 3301 break; 3302 default: { 3303 MOZ_CRASH("Unsupported SIMD permutation operation"); 3304 } 3305 } 3306 #else 3307 MOZ_CRASH("No SIMD"); 3308 #endif 3309 } 3310 3311 void CodeGenerator::visitWasmReplaceLaneSimd128(LWasmReplaceLaneSimd128* ins) { 3312 #ifdef ENABLE_WASM_SIMD 3313 FloatRegister lhs = ToFloatRegister(ins->lhs()); 3314 FloatRegister dest = ToFloatRegister(ins->output()); 3315 const LAllocation* rhs = ins->rhs(); 3316 uint32_t laneIndex = ins->mir()->laneIndex(); 3317 3318 switch (ins->mir()->simdOp()) { 3319 case wasm::SimdOp::I8x16ReplaceLane: 3320 masm.replaceLaneInt8x16(laneIndex, lhs, ToRegister(rhs), dest); 3321 break; 3322 case wasm::SimdOp::I16x8ReplaceLane: 3323 masm.replaceLaneInt16x8(laneIndex, lhs, ToRegister(rhs), dest); 3324 break; 3325 case wasm::SimdOp::I32x4ReplaceLane: 3326 masm.replaceLaneInt32x4(laneIndex, lhs, ToRegister(rhs), dest); 3327 break; 3328 case wasm::SimdOp::F32x4ReplaceLane: 3329 masm.replaceLaneFloat32x4(laneIndex, lhs, ToFloatRegister(rhs), dest); 3330 break; 3331 case wasm::SimdOp::F64x2ReplaceLane: 3332 masm.replaceLaneFloat64x2(laneIndex, lhs, ToFloatRegister(rhs), dest); 3333 break; 3334 default: 3335 MOZ_CRASH("ReplaceLane SimdOp not implemented"); 3336 } 3337 #else 3338 MOZ_CRASH("No SIMD"); 3339 #endif 3340 } 3341 3342 void CodeGenerator::visitWasmReplaceInt64LaneSimd128( 3343 LWasmReplaceInt64LaneSimd128* ins) { 3344 #ifdef ENABLE_WASM_SIMD 3345 MOZ_RELEASE_ASSERT(ins->mir()->simdOp() == wasm::SimdOp::I64x2ReplaceLane); 3346 masm.replaceLaneInt64x2(ins->mir()->laneIndex(), ToFloatRegister(ins->lhs()), 3347 ToRegister64(ins->rhs()), 3348 ToFloatRegister(ins->output())); 3349 #else 3350 MOZ_CRASH("No SIMD"); 3351 #endif 3352 } 3353 3354 void CodeGenerator::visitWasmScalarToSimd128(LWasmScalarToSimd128* ins) { 3355 #ifdef ENABLE_WASM_SIMD 3356 FloatRegister dest = ToFloatRegister(ins->output()); 3357 3358 switch (ins->mir()->simdOp()) { 3359 case wasm::SimdOp::I8x16Splat: 3360 masm.splatX16(ToRegister(ins->src()), dest); 3361 break; 3362 case wasm::SimdOp::I16x8Splat: 3363 masm.splatX8(ToRegister(ins->src()), dest); 3364 break; 3365 case wasm::SimdOp::I32x4Splat: 3366 masm.splatX4(ToRegister(ins->src()), dest); 3367 break; 3368 case wasm::SimdOp::F32x4Splat: 3369 masm.splatX4(ToFloatRegister(ins->src()), dest); 3370 break; 3371 case wasm::SimdOp::F64x2Splat: 3372 masm.splatX2(ToFloatRegister(ins->src()), dest); 3373 break; 3374 default: 3375 MOZ_CRASH("ScalarToSimd128 SimdOp not implemented"); 3376 } 3377 #else 3378 MOZ_CRASH("No SIMD"); 3379 #endif 3380 } 3381 3382 void CodeGenerator::visitWasmInt64ToSimd128(LWasmInt64ToSimd128* ins) { 3383 #ifdef ENABLE_WASM_SIMD 3384 Register64 src = ToRegister64(ins->src()); 3385 FloatRegister dest = ToFloatRegister(ins->output()); 3386 3387 switch (ins->mir()->simdOp()) { 3388 case wasm::SimdOp::I64x2Splat: 3389 masm.splatX2(src, dest); 3390 break; 3391 case wasm::SimdOp::V128Load8x8S: 3392 masm.moveGPR64ToDouble(src, dest); 3393 masm.widenLowInt8x16(dest, dest); 3394 break; 3395 case wasm::SimdOp::V128Load8x8U: 3396 masm.moveGPR64ToDouble(src, dest); 3397 masm.unsignedWidenLowInt8x16(dest, dest); 3398 break; 3399 case wasm::SimdOp::V128Load16x4S: 3400 masm.moveGPR64ToDouble(src, dest); 3401 masm.widenLowInt16x8(dest, dest); 3402 break; 3403 case wasm::SimdOp::V128Load16x4U: 3404 masm.moveGPR64ToDouble(src, dest); 3405 masm.unsignedWidenLowInt16x8(dest, dest); 3406 break; 3407 case wasm::SimdOp::V128Load32x2S: 3408 masm.moveGPR64ToDouble(src, dest); 3409 masm.widenLowInt32x4(dest, dest); 3410 break; 3411 case wasm::SimdOp::V128Load32x2U: 3412 masm.moveGPR64ToDouble(src, dest); 3413 masm.unsignedWidenLowInt32x4(dest, dest); 3414 break; 3415 default: 3416 MOZ_CRASH("Int64ToSimd128 SimdOp not implemented"); 3417 } 3418 #else 3419 MOZ_CRASH("No SIMD"); 3420 #endif 3421 } 3422 3423 void CodeGenerator::visitWasmUnarySimd128(LWasmUnarySimd128* ins) { 3424 #ifdef ENABLE_WASM_SIMD 3425 FloatRegister src = ToFloatRegister(ins->src()); 3426 FloatRegister dest = ToFloatRegister(ins->output()); 3427 3428 switch (ins->mir()->simdOp()) { 3429 case wasm::SimdOp::I8x16Neg: 3430 masm.negInt8x16(src, dest); 3431 break; 3432 case wasm::SimdOp::I16x8Neg: 3433 masm.negInt16x8(src, dest); 3434 break; 3435 case wasm::SimdOp::I16x8ExtendLowI8x16S: 3436 masm.widenLowInt8x16(src, dest); 3437 break; 3438 case wasm::SimdOp::I16x8ExtendHighI8x16S: 3439 masm.widenHighInt8x16(src, dest); 3440 break; 3441 case wasm::SimdOp::I16x8ExtendLowI8x16U: 3442 masm.unsignedWidenLowInt8x16(src, dest); 3443 break; 3444 case wasm::SimdOp::I16x8ExtendHighI8x16U: 3445 masm.unsignedWidenHighInt8x16(src, dest); 3446 break; 3447 case wasm::SimdOp::I32x4Neg: 3448 masm.negInt32x4(src, dest); 3449 break; 3450 case wasm::SimdOp::I32x4ExtendLowI16x8S: 3451 masm.widenLowInt16x8(src, dest); 3452 break; 3453 case wasm::SimdOp::I32x4ExtendHighI16x8S: 3454 masm.widenHighInt16x8(src, dest); 3455 break; 3456 case wasm::SimdOp::I32x4ExtendLowI16x8U: 3457 masm.unsignedWidenLowInt16x8(src, dest); 3458 break; 3459 case wasm::SimdOp::I32x4ExtendHighI16x8U: 3460 masm.unsignedWidenHighInt16x8(src, dest); 3461 break; 3462 case wasm::SimdOp::I32x4TruncSatF32x4S: 3463 masm.truncSatFloat32x4ToInt32x4(src, dest); 3464 break; 3465 case wasm::SimdOp::I32x4TruncSatF32x4U: 3466 masm.unsignedTruncSatFloat32x4ToInt32x4(src, dest, 3467 ToFloatRegister(ins->temp0())); 3468 break; 3469 case wasm::SimdOp::I64x2Neg: 3470 masm.negInt64x2(src, dest); 3471 break; 3472 case wasm::SimdOp::I64x2ExtendLowI32x4S: 3473 masm.widenLowInt32x4(src, dest); 3474 break; 3475 case wasm::SimdOp::I64x2ExtendHighI32x4S: 3476 masm.widenHighInt32x4(src, dest); 3477 break; 3478 case wasm::SimdOp::I64x2ExtendLowI32x4U: 3479 masm.unsignedWidenLowInt32x4(src, dest); 3480 break; 3481 case wasm::SimdOp::I64x2ExtendHighI32x4U: 3482 masm.unsignedWidenHighInt32x4(src, dest); 3483 break; 3484 case wasm::SimdOp::F32x4Abs: 3485 masm.absFloat32x4(src, dest); 3486 break; 3487 case wasm::SimdOp::F32x4Neg: 3488 masm.negFloat32x4(src, dest); 3489 break; 3490 case wasm::SimdOp::F32x4Sqrt: 3491 masm.sqrtFloat32x4(src, dest); 3492 break; 3493 case wasm::SimdOp::F32x4ConvertI32x4S: 3494 masm.convertInt32x4ToFloat32x4(src, dest); 3495 break; 3496 case wasm::SimdOp::F32x4ConvertI32x4U: 3497 masm.unsignedConvertInt32x4ToFloat32x4(src, dest); 3498 break; 3499 case wasm::SimdOp::F64x2Abs: 3500 masm.absFloat64x2(src, dest); 3501 break; 3502 case wasm::SimdOp::F64x2Neg: 3503 masm.negFloat64x2(src, dest); 3504 break; 3505 case wasm::SimdOp::F64x2Sqrt: 3506 masm.sqrtFloat64x2(src, dest); 3507 break; 3508 case wasm::SimdOp::V128Not: 3509 masm.bitwiseNotSimd128(src, dest); 3510 break; 3511 case wasm::SimdOp::I8x16Popcnt: 3512 masm.popcntInt8x16(src, dest, ToFloatRegister(ins->temp0())); 3513 break; 3514 case wasm::SimdOp::I8x16Abs: 3515 masm.absInt8x16(src, dest); 3516 break; 3517 case wasm::SimdOp::I16x8Abs: 3518 masm.absInt16x8(src, dest); 3519 break; 3520 case wasm::SimdOp::I32x4Abs: 3521 masm.absInt32x4(src, dest); 3522 break; 3523 case wasm::SimdOp::I64x2Abs: 3524 masm.absInt64x2(src, dest); 3525 break; 3526 case wasm::SimdOp::F32x4Ceil: 3527 masm.ceilFloat32x4(src, dest); 3528 break; 3529 case wasm::SimdOp::F32x4Floor: 3530 masm.floorFloat32x4(src, dest); 3531 break; 3532 case wasm::SimdOp::F32x4Trunc: 3533 masm.truncFloat32x4(src, dest); 3534 break; 3535 case wasm::SimdOp::F32x4Nearest: 3536 masm.nearestFloat32x4(src, dest); 3537 break; 3538 case wasm::SimdOp::F64x2Ceil: 3539 masm.ceilFloat64x2(src, dest); 3540 break; 3541 case wasm::SimdOp::F64x2Floor: 3542 masm.floorFloat64x2(src, dest); 3543 break; 3544 case wasm::SimdOp::F64x2Trunc: 3545 masm.truncFloat64x2(src, dest); 3546 break; 3547 case wasm::SimdOp::F64x2Nearest: 3548 masm.nearestFloat64x2(src, dest); 3549 break; 3550 case wasm::SimdOp::F32x4DemoteF64x2Zero: 3551 masm.convertFloat64x2ToFloat32x4(src, dest); 3552 break; 3553 case wasm::SimdOp::F64x2PromoteLowF32x4: 3554 masm.convertFloat32x4ToFloat64x2(src, dest); 3555 break; 3556 case wasm::SimdOp::F64x2ConvertLowI32x4S: 3557 masm.convertInt32x4ToFloat64x2(src, dest); 3558 break; 3559 case wasm::SimdOp::F64x2ConvertLowI32x4U: 3560 masm.unsignedConvertInt32x4ToFloat64x2(src, dest); 3561 break; 3562 case wasm::SimdOp::I32x4TruncSatF64x2SZero: 3563 masm.truncSatFloat64x2ToInt32x4(src, dest, ToFloatRegister(ins->temp0())); 3564 break; 3565 case wasm::SimdOp::I32x4TruncSatF64x2UZero: 3566 masm.unsignedTruncSatFloat64x2ToInt32x4(src, dest, 3567 ToFloatRegister(ins->temp0())); 3568 break; 3569 case wasm::SimdOp::I16x8ExtaddPairwiseI8x16S: 3570 masm.extAddPairwiseInt8x16(src, dest); 3571 break; 3572 case wasm::SimdOp::I16x8ExtaddPairwiseI8x16U: 3573 masm.unsignedExtAddPairwiseInt8x16(src, dest); 3574 break; 3575 case wasm::SimdOp::I32x4ExtaddPairwiseI16x8S: 3576 masm.extAddPairwiseInt16x8(src, dest); 3577 break; 3578 case wasm::SimdOp::I32x4ExtaddPairwiseI16x8U: 3579 masm.unsignedExtAddPairwiseInt16x8(src, dest); 3580 break; 3581 case wasm::SimdOp::I32x4RelaxedTruncF32x4S: 3582 masm.truncFloat32x4ToInt32x4Relaxed(src, dest); 3583 break; 3584 case wasm::SimdOp::I32x4RelaxedTruncF32x4U: 3585 masm.unsignedTruncFloat32x4ToInt32x4Relaxed(src, dest); 3586 break; 3587 case wasm::SimdOp::I32x4RelaxedTruncF64x2SZero: 3588 masm.truncFloat64x2ToInt32x4Relaxed(src, dest); 3589 break; 3590 case wasm::SimdOp::I32x4RelaxedTruncF64x2UZero: 3591 masm.unsignedTruncFloat64x2ToInt32x4Relaxed(src, dest); 3592 break; 3593 default: 3594 MOZ_CRASH("Unary SimdOp not implemented"); 3595 } 3596 #else 3597 MOZ_CRASH("No SIMD"); 3598 #endif 3599 } 3600 3601 void CodeGenerator::visitWasmReduceSimd128(LWasmReduceSimd128* ins) { 3602 #ifdef ENABLE_WASM_SIMD 3603 FloatRegister src = ToFloatRegister(ins->src()); 3604 const LDefinition* dest = ins->output(); 3605 uint32_t imm = ins->mir()->imm(); 3606 3607 switch (ins->mir()->simdOp()) { 3608 case wasm::SimdOp::V128AnyTrue: 3609 masm.anyTrueSimd128(src, ToRegister(dest)); 3610 break; 3611 case wasm::SimdOp::I8x16AllTrue: 3612 masm.allTrueInt8x16(src, ToRegister(dest)); 3613 break; 3614 case wasm::SimdOp::I16x8AllTrue: 3615 masm.allTrueInt16x8(src, ToRegister(dest)); 3616 break; 3617 case wasm::SimdOp::I32x4AllTrue: 3618 masm.allTrueInt32x4(src, ToRegister(dest)); 3619 break; 3620 case wasm::SimdOp::I64x2AllTrue: 3621 masm.allTrueInt64x2(src, ToRegister(dest)); 3622 break; 3623 case wasm::SimdOp::I8x16Bitmask: 3624 masm.bitmaskInt8x16(src, ToRegister(dest)); 3625 break; 3626 case wasm::SimdOp::I16x8Bitmask: 3627 masm.bitmaskInt16x8(src, ToRegister(dest)); 3628 break; 3629 case wasm::SimdOp::I32x4Bitmask: 3630 masm.bitmaskInt32x4(src, ToRegister(dest)); 3631 break; 3632 case wasm::SimdOp::I64x2Bitmask: 3633 masm.bitmaskInt64x2(src, ToRegister(dest)); 3634 break; 3635 case wasm::SimdOp::I8x16ExtractLaneS: 3636 masm.extractLaneInt8x16(imm, src, ToRegister(dest)); 3637 break; 3638 case wasm::SimdOp::I8x16ExtractLaneU: 3639 masm.unsignedExtractLaneInt8x16(imm, src, ToRegister(dest)); 3640 break; 3641 case wasm::SimdOp::I16x8ExtractLaneS: 3642 masm.extractLaneInt16x8(imm, src, ToRegister(dest)); 3643 break; 3644 case wasm::SimdOp::I16x8ExtractLaneU: 3645 masm.unsignedExtractLaneInt16x8(imm, src, ToRegister(dest)); 3646 break; 3647 case wasm::SimdOp::I32x4ExtractLane: 3648 masm.extractLaneInt32x4(imm, src, ToRegister(dest)); 3649 break; 3650 case wasm::SimdOp::F32x4ExtractLane: 3651 masm.extractLaneFloat32x4(imm, src, ToFloatRegister(dest)); 3652 break; 3653 case wasm::SimdOp::F64x2ExtractLane: 3654 masm.extractLaneFloat64x2(imm, src, ToFloatRegister(dest)); 3655 break; 3656 default: 3657 MOZ_CRASH("Reduce SimdOp not implemented"); 3658 } 3659 #else 3660 MOZ_CRASH("No SIMD"); 3661 #endif 3662 } 3663 3664 void CodeGenerator::visitWasmReduceAndBranchSimd128( 3665 LWasmReduceAndBranchSimd128* ins) { 3666 #ifdef ENABLE_WASM_SIMD 3667 FloatRegister src = ToFloatRegister(ins->src()); 3668 3669 switch (ins->simdOp()) { 3670 case wasm::SimdOp::V128AnyTrue: 3671 // Set the zero flag if all of the lanes are zero, and branch on that. 3672 masm.vptest(src, src); 3673 emitBranch(Assembler::NotEqual, ins->ifTrue(), ins->ifFalse()); 3674 break; 3675 case wasm::SimdOp::I8x16AllTrue: 3676 case wasm::SimdOp::I16x8AllTrue: 3677 case wasm::SimdOp::I32x4AllTrue: 3678 case wasm::SimdOp::I64x2AllTrue: { 3679 // Compare all lanes to zero, set the zero flag if none of the lanes are 3680 // zero, and branch on that. 3681 ScratchSimd128Scope tmp(masm); 3682 masm.vpxor(tmp, tmp, tmp); 3683 switch (ins->simdOp()) { 3684 case wasm::SimdOp::I8x16AllTrue: 3685 masm.vpcmpeqb(Operand(src), tmp, tmp); 3686 break; 3687 case wasm::SimdOp::I16x8AllTrue: 3688 masm.vpcmpeqw(Operand(src), tmp, tmp); 3689 break; 3690 case wasm::SimdOp::I32x4AllTrue: 3691 masm.vpcmpeqd(Operand(src), tmp, tmp); 3692 break; 3693 case wasm::SimdOp::I64x2AllTrue: 3694 masm.vpcmpeqq(Operand(src), tmp, tmp); 3695 break; 3696 default: 3697 MOZ_CRASH(); 3698 } 3699 masm.vptest(tmp, tmp); 3700 emitBranch(Assembler::Equal, ins->ifTrue(), ins->ifFalse()); 3701 break; 3702 } 3703 case wasm::SimdOp::I16x8Bitmask: { 3704 masm.bitwiseTestSimd128(SimdConstant::SplatX8(0x8000), src); 3705 emitBranch(Assembler::NotEqual, ins->ifTrue(), ins->ifFalse()); 3706 break; 3707 } 3708 default: 3709 MOZ_CRASH("Reduce-and-branch SimdOp not implemented"); 3710 } 3711 #else 3712 MOZ_CRASH("No SIMD"); 3713 #endif 3714 } 3715 3716 void CodeGenerator::visitWasmReduceSimd128ToInt64( 3717 LWasmReduceSimd128ToInt64* ins) { 3718 #ifdef ENABLE_WASM_SIMD 3719 FloatRegister src = ToFloatRegister(ins->src()); 3720 Register64 dest = ToOutRegister64(ins); 3721 uint32_t imm = ins->mir()->imm(); 3722 3723 switch (ins->mir()->simdOp()) { 3724 case wasm::SimdOp::I64x2ExtractLane: 3725 masm.extractLaneInt64x2(imm, src, dest); 3726 break; 3727 default: 3728 MOZ_CRASH("Reduce SimdOp not implemented"); 3729 } 3730 #else 3731 MOZ_CRASH("No SIMD"); 3732 #endif 3733 } 3734 3735 void CodeGenerator::visitWasmLoadLaneSimd128(LWasmLoadLaneSimd128* ins) { 3736 #ifdef ENABLE_WASM_SIMD 3737 const MWasmLoadLaneSimd128* mir = ins->mir(); 3738 const wasm::MemoryAccessDesc& access = mir->access(); 3739 3740 access.assertOffsetInGuardPages(); 3741 uint32_t offset = access.offset32(); 3742 3743 const LAllocation* value = ins->src(); 3744 Operand srcAddr = toMemoryAccessOperand(ins, offset); 3745 3746 switch (mir->laneSize()) { 3747 case 1: { 3748 masm.append(access, wasm::TrapMachineInsn::Load8, 3749 FaultingCodeOffset(masm.currentOffset())); 3750 masm.vpinsrb(mir->laneIndex(), srcAddr, ToFloatRegister(value), 3751 ToFloatRegister(value)); 3752 break; 3753 } 3754 case 2: { 3755 masm.append(access, wasm::TrapMachineInsn::Load16, 3756 FaultingCodeOffset(masm.currentOffset())); 3757 masm.vpinsrw(mir->laneIndex(), srcAddr, ToFloatRegister(value), 3758 ToFloatRegister(value)); 3759 break; 3760 } 3761 case 4: { 3762 masm.append(access, wasm::TrapMachineInsn::Load32, 3763 FaultingCodeOffset(masm.currentOffset())); 3764 masm.vinsertps(mir->laneIndex() << 4, srcAddr, ToFloatRegister(value), 3765 ToFloatRegister(value)); 3766 break; 3767 } 3768 case 8: { 3769 masm.append(access, wasm::TrapMachineInsn::Load64, 3770 FaultingCodeOffset(masm.currentOffset())); 3771 if (mir->laneIndex() == 0) { 3772 masm.vmovlps(srcAddr, ToFloatRegister(value), ToFloatRegister(value)); 3773 } else { 3774 masm.vmovhps(srcAddr, ToFloatRegister(value), ToFloatRegister(value)); 3775 } 3776 break; 3777 } 3778 default: 3779 MOZ_CRASH("Unsupported load lane size"); 3780 } 3781 #else 3782 MOZ_CRASH("No SIMD"); 3783 #endif 3784 } 3785 3786 void CodeGenerator::visitWasmStoreLaneSimd128(LWasmStoreLaneSimd128* ins) { 3787 #ifdef ENABLE_WASM_SIMD 3788 const MWasmStoreLaneSimd128* mir = ins->mir(); 3789 const wasm::MemoryAccessDesc& access = mir->access(); 3790 3791 access.assertOffsetInGuardPages(); 3792 uint32_t offset = access.offset32(); 3793 3794 const LAllocation* src = ins->src(); 3795 Operand destAddr = toMemoryAccessOperand(ins, offset); 3796 3797 switch (mir->laneSize()) { 3798 case 1: { 3799 masm.append(access, wasm::TrapMachineInsn::Store8, 3800 FaultingCodeOffset(masm.currentOffset())); 3801 masm.vpextrb(mir->laneIndex(), ToFloatRegister(src), destAddr); 3802 break; 3803 } 3804 case 2: { 3805 masm.append(access, wasm::TrapMachineInsn::Store16, 3806 FaultingCodeOffset(masm.currentOffset())); 3807 masm.vpextrw(mir->laneIndex(), ToFloatRegister(src), destAddr); 3808 break; 3809 } 3810 case 4: { 3811 masm.append(access, wasm::TrapMachineInsn::Store32, 3812 FaultingCodeOffset(masm.currentOffset())); 3813 unsigned lane = mir->laneIndex(); 3814 if (lane == 0) { 3815 masm.vmovss(ToFloatRegister(src), destAddr); 3816 } else { 3817 masm.vextractps(lane, ToFloatRegister(src), destAddr); 3818 } 3819 break; 3820 } 3821 case 8: { 3822 masm.append(access, wasm::TrapMachineInsn::Store64, 3823 FaultingCodeOffset(masm.currentOffset())); 3824 if (mir->laneIndex() == 0) { 3825 masm.vmovlps(ToFloatRegister(src), destAddr); 3826 } else { 3827 masm.vmovhps(ToFloatRegister(src), destAddr); 3828 } 3829 break; 3830 } 3831 default: 3832 MOZ_CRASH("Unsupported store lane size"); 3833 } 3834 #else 3835 MOZ_CRASH("No SIMD"); 3836 #endif 3837 } 3838 3839 } // namespace jit 3840 } // namespace js