commit cfd2a9855605d99afee434be297844f6ae095756
parent d907853cc7c8b338b65102a9e4ced6c554b208ac
Author: André Bargull <andre.bargull@gmail.com>
Date: Tue, 11 Nov 2025 12:29:34 +0000
Bug 1998161 - Part 3: Remove register constraints for div/mod with constants on x64. r=spidermonkey-reviewers,iain
Using `imulq` instead of `imull`/`umull` allows to use any register for
division by constants on x64.
Differential Revision: https://phabricator.services.mozilla.com/D271221
Diffstat:
2 files changed, 134 insertions(+), 53 deletions(-)
diff --git a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
@@ -987,14 +987,17 @@ void CodeGenerator::visitUMod(LUMod* ins) {
}
template <class LUDivOrUMod>
-static void UnsignedDivideWithConstant(MacroAssembler& masm, LUDivOrUMod* ins) {
+static void UnsignedDivideWithConstant(MacroAssembler& masm, LUDivOrUMod* ins,
+ Register result, Register temp) {
Register lhs = ToRegister(ins->numerator());
- [[maybe_unused]] Register output = ToRegister(ins->output());
- [[maybe_unused]] Register temp = ToRegister(ins->temp0());
uint32_t d = ins->denominator();
- MOZ_ASSERT(lhs != eax && lhs != edx);
- MOZ_ASSERT((output == eax && temp == edx) || (output == edx && temp == eax));
+ MOZ_ASSERT(lhs != result && lhs != temp);
+#ifdef JS_CODEGEN_X86
+ MOZ_ASSERT(result == edx && temp == eax);
+#else
+ MOZ_ASSERT(result != temp);
+#endif
// The denominator isn't a power of 2 (see LDivPowTwoI and LModPowTwoI).
MOZ_ASSERT(!mozilla::IsPowerOfTwo(d));
@@ -1002,8 +1005,23 @@ static void UnsignedDivideWithConstant(MacroAssembler& masm, LUDivOrUMod* ins) {
auto rmc = ReciprocalMulConstants::computeUnsignedDivisionConstants(d);
// We first compute (M * n) >> 32, where M = rmc.multiplier.
+#ifdef JS_CODEGEN_X86
masm.movl(Imm32(rmc.multiplier), eax);
masm.umull(lhs);
+#else
+ // Zero-extend |lhs| in preparation for a 64-bit multiplication.
+ masm.movl(lhs, result);
+
+ // Note that imul sign-extends its 32-bit immediate, but we need an unsigned
+ // multiplication.
+ if (int32_t(rmc.multiplier) >= 0) {
+ masm.imulq(Imm32(rmc.multiplier), result, result);
+ } else {
+ masm.movl(Imm32(rmc.multiplier), temp);
+ masm.imulq(temp, result);
+ }
+ masm.shrq(Imm32(32), result);
+#endif
if (rmc.multiplier > UINT32_MAX) {
// M >= 2^32 and shift == 0 is impossible, as d >= 2 implies that
// ((M * n) >> (32 + shift)) >= n > floor(n/d) whenever n >= d,
@@ -1011,35 +1029,40 @@ static void UnsignedDivideWithConstant(MacroAssembler& masm, LUDivOrUMod* ins) {
MOZ_ASSERT(rmc.shiftAmount > 0);
MOZ_ASSERT(rmc.multiplier < (int64_t(1) << 33));
- // We actually computed edx = ((uint32_t(M) * n) >> 32) instead. Since
- // (M * n) >> (32 + shift) is the same as (edx + n) >> shift, we can
+ // We actually computed result = ((uint32_t(M) * n) >> 32) instead. Since
+ // (M * n) >> (32 + shift) is the same as (result + n) >> shift, we can
// correct for the overflow. This case is a bit trickier than the signed
- // case, though, as the (edx + n) addition itself can overflow; however,
- // note that (edx + n) >> shift == (((n - edx) >> 1) + edx) >> (shift - 1),
+ // case, though, as the (result + n) addition itself can overflow; however,
+ // note that
+ // (result + n) >> shift == (((n - result) >> 1) + result) >> (shift - 1),
// which is overflow-free. See Hacker's Delight, section 10-8 for details.
- // Compute (n - edx) >> 1 into eax.
- masm.movl(lhs, eax);
- masm.subl(edx, eax);
- masm.shrl(Imm32(1), eax);
+ // Compute (n - result) >> 1 into temp.
+ masm.movl(lhs, temp);
+ masm.subl(result, temp);
+ masm.shrl(Imm32(1), temp);
// Finish the computation.
- masm.addl(eax, edx);
- masm.shrl(Imm32(rmc.shiftAmount - 1), edx);
+ masm.addl(temp, result);
+ masm.shrl(Imm32(rmc.shiftAmount - 1), result);
} else {
- masm.shrl(Imm32(rmc.shiftAmount), edx);
+ masm.shrl(Imm32(rmc.shiftAmount), result);
}
}
void CodeGenerator::visitUDivConstant(LUDivConstant* ins) {
Register lhs = ToRegister(ins->numerator());
Register output = ToRegister(ins->output());
+ Register temp = ToRegister(ins->temp0());
uint32_t d = ins->denominator();
MDiv* mir = ins->mir();
+#ifdef JS_CODEGEN_X86
// This emits the division answer into edx.
MOZ_ASSERT(output == edx);
+ MOZ_ASSERT(temp == eax);
+#endif
if (d == 0) {
if (mir->trapOnError()) {
@@ -1052,24 +1075,28 @@ void CodeGenerator::visitUDivConstant(LUDivConstant* ins) {
return;
}
- // Compute the truncated division result in |edx|.
- UnsignedDivideWithConstant(masm, ins);
+ // Compute the truncated division result in |output|.
+ UnsignedDivideWithConstant(masm, ins, output, temp);
if (!mir->isTruncated()) {
- masm.imull(Imm32(d), edx, eax);
- bailoutCmp32(Assembler::NotEqual, lhs, eax, ins->snapshot());
+ masm.imull(Imm32(d), output, temp);
+ bailoutCmp32(Assembler::NotEqual, lhs, temp, ins->snapshot());
}
}
void CodeGenerator::visitUModConstant(LUModConstant* ins) {
Register lhs = ToRegister(ins->numerator());
Register output = ToRegister(ins->output());
+ Register temp = ToRegister(ins->temp0());
uint32_t d = ins->denominator();
MMod* mir = ins->mir();
+#ifdef JS_CODEGEN_X86
// This emits the modulus answer into eax.
MOZ_ASSERT(output == eax);
+ MOZ_ASSERT(temp == edx);
+#endif
if (d == 0) {
if (mir->trapOnError()) {
@@ -1082,17 +1109,17 @@ void CodeGenerator::visitUModConstant(LUModConstant* ins) {
return;
}
- // Compute the truncated division result in |edx|.
- UnsignedDivideWithConstant(masm, ins);
+ // Compute the truncated division result in |temp|.
+ UnsignedDivideWithConstant(masm, ins, temp, output);
- // We now have the truncated division value in edx. If we're computing a
+ // We now have the truncated division value in |temp|. If we're computing a
// modulus or checking whether the division resulted in an integer, we need
// to multiply the obtained value by d and finish the computation/check.
//
- // eax = lhs - d * edx
- masm.imull(Imm32(d), edx, edx);
- masm.movl(lhs, eax);
- masm.subl(edx, eax);
+ // output = lhs - d * temp
+ masm.imull(Imm32(d), temp, temp);
+ masm.movl(lhs, output);
+ masm.subl(temp, output);
// The final result of the modulus op, just computed above by the
// sub instruction, can be a number in the range [2^31, 2^32). If
@@ -1179,14 +1206,17 @@ void CodeGenerator::visitDivPowTwoI(LDivPowTwoI* ins) {
}
template <class LDivOrMod>
-static void DivideWithConstant(MacroAssembler& masm, LDivOrMod* ins) {
+static void DivideWithConstant(MacroAssembler& masm, LDivOrMod* ins,
+ Register result, Register temp) {
Register lhs = ToRegister(ins->numerator());
- [[maybe_unused]] Register output = ToRegister(ins->output());
- [[maybe_unused]] Register temp = ToRegister(ins->temp0());
int32_t d = ins->denominator();
- MOZ_ASSERT(lhs != eax && lhs != edx);
- MOZ_ASSERT((output == eax && temp == edx) || (output == edx && temp == eax));
+ MOZ_ASSERT(lhs != result && lhs != temp);
+#ifdef JS_CODEGEN_X86
+ MOZ_ASSERT(result == edx && temp == eax);
+#else
+ MOZ_ASSERT(result != temp);
+#endif
// The absolute value of the denominator isn't a power of 2 (see LDivPowTwoI
// and LModPowTwoI).
@@ -1199,45 +1229,56 @@ static void DivideWithConstant(MacroAssembler& masm, LDivOrMod* ins) {
auto rmc = ReciprocalMulConstants::computeSignedDivisionConstants(d);
// We first compute (M * n) >> 32, where M = rmc.multiplier.
+#ifdef JS_CODEGEN_X86
masm.movl(Imm32(rmc.multiplier), eax);
masm.imull(lhs);
+#else
+ // Sign-extend |lhs| in preparation for a 64-bit multiplication.
+ masm.movslq(lhs, result);
+ masm.imulq(Imm32(rmc.multiplier), result, result);
+ masm.shrq(Imm32(32), result);
+#endif
if (rmc.multiplier > INT32_MAX) {
MOZ_ASSERT(rmc.multiplier < (int64_t(1) << 32));
- // We actually computed edx = ((int32_t(M) * n) >> 32) instead. Since
- // (M * n) >> 32 is the same as (edx + n), we can correct for the overflow.
- // (edx + n) can't overflow, as n and edx have opposite signs because
- // int32_t(M) is negative.
- masm.addl(lhs, edx);
+ // We actually computed result = ((int32_t(M) * n) >> 32) instead. Since
+ // (M * n) >> 32 is the same as (result + n), we can correct for the
+ // overflow. (result + n) can't overflow, as n and |result| have opposite
+ // signs because int32_t(M) is negative.
+ masm.addl(lhs, result);
}
// (M * n) >> (32 + shift) is the truncated division answer if n is
// non-negative, as proved in the comments of computeDivisionConstants. We
// must add 1 later if n is negative to get the right answer in all cases.
- masm.sarl(Imm32(rmc.shiftAmount), edx);
+ masm.sarl(Imm32(rmc.shiftAmount), result);
// We'll subtract -1 instead of adding 1, because (n < 0 ? -1 : 0) can be
// computed with just a sign-extending shift of 31 bits.
if (mir->canBeNegativeDividend()) {
- masm.movl(lhs, eax);
- masm.sarl(Imm32(31), eax);
- masm.subl(eax, edx);
+ masm.movl(lhs, temp);
+ masm.sarl(Imm32(31), temp);
+ masm.subl(temp, result);
}
- // After this, edx contains the correct truncated division result.
+ // After this, |result| contains the correct truncated division result.
if (d < 0) {
- masm.negl(edx);
+ masm.negl(result);
}
}
void CodeGenerator::visitDivConstantI(LDivConstantI* ins) {
Register lhs = ToRegister(ins->numerator());
Register output = ToRegister(ins->output());
+ Register temp = ToRegister(ins->temp0());
int32_t d = ins->denominator();
MDiv* mir = ins->mir();
+#ifdef JS_CODEGEN_X86
// This emits the division answer into edx.
MOZ_ASSERT(output == edx);
+ MOZ_ASSERT(temp == eax);
+#endif
if (d == 0) {
if (mir->trapOnError()) {
@@ -1250,14 +1291,14 @@ void CodeGenerator::visitDivConstantI(LDivConstantI* ins) {
return;
}
- // Compute the truncated division result in |edx|.
- DivideWithConstant(masm, ins);
+ // Compute the truncated division result in |output|.
+ DivideWithConstant(masm, ins, output, temp);
if (!mir->isTruncated()) {
// This is a division op. Multiply the obtained value by d to check if
// the correct answer is an integer. This cannot overflow, since |d| > 1.
- masm.imull(Imm32(d), edx, eax);
- bailoutCmp32(Assembler::NotEqual, lhs, eax, ins->snapshot());
+ masm.imull(Imm32(d), output, temp);
+ bailoutCmp32(Assembler::NotEqual, lhs, temp, ins->snapshot());
// If lhs is zero and the divisor is negative, the answer should have
// been -0.
@@ -1270,12 +1311,16 @@ void CodeGenerator::visitDivConstantI(LDivConstantI* ins) {
void CodeGenerator::visitModConstantI(LModConstantI* ins) {
Register lhs = ToRegister(ins->numerator());
Register output = ToRegister(ins->output());
+ Register temp = ToRegister(ins->temp0());
int32_t d = ins->denominator();
MMod* mir = ins->mir();
+#ifdef JS_CODEGEN_X86
// This emits the modulus answer into eax.
MOZ_ASSERT(output == eax);
+ MOZ_ASSERT(temp == edx);
+#endif
if (d == 0) {
if (mir->trapOnError()) {
@@ -1288,19 +1333,19 @@ void CodeGenerator::visitModConstantI(LModConstantI* ins) {
return;
}
- // Compute the truncated division result in |edx|.
- DivideWithConstant(masm, ins);
+ // Compute the truncated division result in |temp|.
+ DivideWithConstant(masm, ins, temp, output);
- // Compute the remainder in |eax|: eax = lhs - d * edx
- masm.imull(Imm32(-d), edx, eax);
- masm.addl(lhs, eax);
+ // Compute the remainder in |output|: output = lhs - d * temp
+ masm.imull(Imm32(-d), temp, output);
+ masm.addl(lhs, output);
if (!mir->isTruncated() && mir->canBeNegativeDividend()) {
// This is a mod op. If the computed value is zero and lhs
// is negative, the answer should have been -0.
Label done;
masm.branch32(Assembler::GreaterThanOrEqual, lhs, Imm32(0), &done);
- bailoutTest32(Assembler::Zero, eax, eax, ins->snapshot());
+ bailoutTest32(Assembler::Zero, output, output, ins->snapshot());
masm.bind(&done);
}
}
diff --git a/js/src/jit/x86-shared/Lowering-x86-shared.cpp b/js/src/jit/x86-shared/Lowering-x86-shared.cpp
@@ -201,12 +201,21 @@ void LIRGeneratorX86Shared::lowerDivI(MDiv* div) {
return;
}
+#ifdef JS_CODEGEN_X86
auto* lir = new (alloc())
LDivConstantI(useRegister(div->lhs()), tempFixed(eax), rhs);
if (div->fallible()) {
assignSnapshot(lir, div->bailoutKind());
}
defineFixed(lir, div, LAllocation(AnyRegister(edx)));
+#else
+ auto* lir =
+ new (alloc()) LDivConstantI(useRegister(div->lhs()), temp(), rhs);
+ if (div->fallible()) {
+ assignSnapshot(lir, div->bailoutKind());
+ }
+ define(lir, div);
+#endif
return;
}
@@ -232,12 +241,21 @@ void LIRGeneratorX86Shared::lowerModI(MMod* mod) {
return;
}
+#ifdef JS_CODEGEN_X86
auto* lir = new (alloc())
LModConstantI(useRegister(mod->lhs()), tempFixed(edx), rhs);
if (mod->fallible()) {
assignSnapshot(lir, mod->bailoutKind());
}
defineFixed(lir, mod, LAllocation(AnyRegister(eax)));
+#else
+ auto* lir =
+ new (alloc()) LModConstantI(useRegister(mod->lhs()), temp(), rhs);
+ if (mod->fallible()) {
+ assignSnapshot(lir, mod->bailoutKind());
+ }
+ define(lir, mod);
+#endif
return;
}
@@ -363,12 +381,21 @@ void LIRGeneratorX86Shared::lowerUDiv(MDiv* div) {
}
defineReuseInput(lir, div, 0);
} else {
+#ifdef JS_CODEGEN_X86
auto* lir = new (alloc())
LUDivConstant(useRegister(div->lhs()), tempFixed(eax), rhs);
if (div->fallible()) {
assignSnapshot(lir, div->bailoutKind());
}
defineFixed(lir, div, LAllocation(AnyRegister(edx)));
+#else
+ auto* lir =
+ new (alloc()) LUDivConstant(useRegister(div->lhs()), temp(), rhs);
+ if (div->fallible()) {
+ assignSnapshot(lir, div->bailoutKind());
+ }
+ define(lir, div);
+#endif
}
return;
}
@@ -394,12 +421,21 @@ void LIRGeneratorX86Shared::lowerUMod(MMod* mod) {
}
defineReuseInput(lir, mod, 0);
} else {
+#ifdef JS_CODEGEN_X86
auto* lir = new (alloc())
LUModConstant(useRegister(mod->lhs()), tempFixed(edx), rhs);
if (mod->fallible()) {
assignSnapshot(lir, mod->bailoutKind());
}
defineFixed(lir, mod, LAllocation(AnyRegister(eax)));
+#else
+ auto* lir =
+ new (alloc()) LUModConstant(useRegister(mod->lhs()), temp(), rhs);
+ if (mod->fallible()) {
+ assignSnapshot(lir, mod->bailoutKind());
+ }
+ define(lir, mod);
+#endif
}
return;
}