commit 09e6a576a304ef061aac062e67b5b776653440b6
parent 81a2dd4de1abc2b3fdfb87a2bb509cd7e32142ed
Author: André Bargull <andre.bargull@gmail.com>
Date: Mon, 10 Nov 2025 15:13:43 +0000
Bug 1998457 - Part 3: Optimise int64 divison and modulus with constants in x64. r=spidermonkey-reviewers,iain
These LIR instructions match their 32-bit counterparts in x86-shared.
Differential Revision: https://phabricator.services.mozilla.com/D271438
Diffstat:
3 files changed, 446 insertions(+), 12 deletions(-)
diff --git a/js/src/jit/LIROps.yaml b/js/src/jit/LIROps.yaml
@@ -4322,6 +4322,60 @@
num_temps: 1
mir_op: Mod
+- name: DivPowTwoI64
+ result_type: WordSized
+ operands:
+ numerator: WordSized
+ numeratorCopy: WordSized
+ arguments:
+ shift: int32_t
+ negativeDivisor: bool
+ mir_op: Div
+
+- name: ModPowTwoI64
+ result_type: WordSized
+ operands:
+ input: WordSized
+ arguments:
+ shift: int32_t
+ mir_op: Mod
+
+- name: DivConstantI64
+ result_type: WordSized
+ operands:
+ numerator: WordSized
+ arguments:
+ denominator: int64_t
+ num_temps: 1
+ mir_op: Div
+
+- name: ModConstantI64
+ result_type: WordSized
+ operands:
+ numerator: WordSized
+ arguments:
+ denominator: int64_t
+ num_temps: 1
+ mir_op: Mod
+
+- name: UDivConstantI64
+ result_type: WordSized
+ operands:
+ numerator: WordSized
+ arguments:
+ denominator: uint64_t
+ num_temps: 1
+ mir_op: Div
+
+- name: UModConstantI64
+ result_type: WordSized
+ operands:
+ numerator: WordSized
+ arguments:
+ denominator: uint64_t
+ num_temps: 1
+ mir_op: Mod
+
- name: WasmTruncateToInt64
result_type: Int64
operands:
diff --git a/js/src/jit/x64/CodeGenerator-x64.cpp b/js/src/jit/x64/CodeGenerator-x64.cpp
@@ -12,6 +12,7 @@
#include "jit/CodeGenerator.h"
#include "jit/MIR-wasm.h"
#include "jit/MIR.h"
+#include "jit/ReciprocalMulConstants.h"
#include "js/ScalarType.h" // js::Scalar::Type
#include "jit/MacroAssembler-inl.h"
@@ -308,6 +309,299 @@ void CodeGenerator::visitUModI64(LUModI64* lir) {
masm.udivq(rhs);
}
+void CodeGenerator::visitDivPowTwoI64(LDivPowTwoI64* ins) {
+ Register lhs = ToRegister(ins->numerator());
+
+ int32_t shift = ins->shift();
+ bool negativeDivisor = ins->negativeDivisor();
+ MDiv* mir = ins->mir();
+
+ // We use defineReuseInput so these should always be the same, which is
+ // convenient since all of our instructions here are two-address.
+ MOZ_ASSERT(lhs == ToRegister(ins->output()));
+
+ // Unsigned division is just a right-shift.
+ if (mir->isUnsigned()) {
+ if (shift != 0) {
+ masm.shrq(Imm32(shift), lhs);
+ }
+ return;
+ }
+
+ if (shift != 0) {
+ // Adjust the value so that shifting produces a correctly rounded result
+ // when the numerator is negative.
+ // See 10-1 "Signed Division by a Known Power of 2" in Henry S. Warren,
+ // Jr.'s Hacker's Delight.
+ if (mir->canBeNegativeDividend()) {
+ Register lhsCopy = ToRegister(ins->numeratorCopy());
+ MOZ_ASSERT(lhsCopy != lhs);
+ if (shift > 1) {
+ // Copy the sign bit of the numerator. (= (2^63 - 1) or 0)
+ masm.sarq(Imm32(63), lhs);
+ }
+ // Divide by 2^(64 - shift)
+ // i.e. (= (2^64 - 1) / 2^(64 - shift) or 0)
+ // i.e. (= (2^shift - 1) or 0)
+ masm.shrq(Imm32(64 - shift), lhs);
+ // If signed, make any 1 bit below the shifted bits to bubble up, such
+ // that once shifted the value would be rounded towards 0.
+ masm.addq(lhsCopy, lhs);
+ }
+ masm.sarq(Imm32(shift), lhs);
+ }
+
+ if (negativeDivisor) {
+ masm.negq(lhs);
+ }
+
+ if (shift == 0 && negativeDivisor) {
+ // INT64_MIN / -1 overflows.
+ Label ok;
+ masm.j(Assembler::NoOverflow, &ok);
+ masm.wasmTrap(wasm::Trap::IntegerOverflow, mir->trapSiteDesc());
+ masm.bind(&ok);
+ }
+}
+
+void CodeGenerator::visitModPowTwoI64(LModPowTwoI64* ins) {
+ Register64 lhs = Register64(ToRegister(ins->input()));
+ int32_t shift = ins->shift();
+ bool canBeNegative =
+ !ins->mir()->isUnsigned() && ins->mir()->canBeNegativeDividend();
+
+ MOZ_ASSERT(lhs.reg == ToRegister(ins->output()));
+
+ if (shift == 0) {
+ masm.xorl(lhs.reg, lhs.reg);
+ return;
+ }
+
+ auto clearHighBits = [&]() {
+ switch (shift) {
+ case 8:
+ masm.movzbl(lhs.reg, lhs.reg);
+ break;
+ case 16:
+ masm.movzwl(lhs.reg, lhs.reg);
+ break;
+ case 32:
+ masm.movl(lhs.reg, lhs.reg);
+ break;
+ default:
+ masm.and64(Imm64((uint64_t(1) << shift) - 1), lhs);
+ break;
+ }
+ };
+
+ Label negative;
+
+ if (canBeNegative) {
+ // Switch based on sign of the lhs.
+ // Positive numbers are just a bitmask
+ masm.branchTest64(Assembler::Signed, lhs, lhs, &negative);
+ }
+
+ clearHighBits();
+
+ if (canBeNegative) {
+ Label done;
+ masm.jump(&done);
+
+ // Negative numbers need a negate, bitmask, negate
+ masm.bind(&negative);
+
+ // Unlike in the visitModI64 case, we are not computing the mod by means of
+ // a division. Therefore, the divisor = -1 case isn't problematic (the andq
+ // always returns 0, which is what we expect).
+ //
+ // The negq instruction overflows if lhs == INT64_MIN, but this is also not
+ // a problem: shift is at most 63, and so the andq also always returns 0.
+ masm.neg64(lhs);
+ clearHighBits();
+ masm.neg64(lhs);
+
+ masm.bind(&done);
+ }
+}
+
+template <class LDivOrMod>
+static void Divide64WithConstant(MacroAssembler& masm, LDivOrMod* ins) {
+ Register lhs = ToRegister(ins->numerator());
+ [[maybe_unused]] Register output = ToRegister(ins->output());
+ [[maybe_unused]] Register temp = ToRegister(ins->temp0());
+ int64_t d = ins->denominator();
+
+ MOZ_ASSERT(lhs != rax && lhs != rdx);
+ MOZ_ASSERT((output == rax && temp == rdx) || (output == rdx && temp == rax));
+
+ // The absolute value of the denominator isn't a power of 2 (see LDivPowTwoI64
+ // and LModPowTwoI64).
+ MOZ_ASSERT(!mozilla::IsPowerOfTwo(mozilla::Abs(d)));
+
+ auto* mir = ins->mir();
+
+ // We will first divide by Abs(d), and negate the answer if d is negative.
+ // If desired, this can be avoided by generalizing computeDivisionConstants.
+ auto rmc = ReciprocalMulConstants::computeSignedDivisionConstants(d);
+
+ // We first compute (M * n) >> 64, where M = rmc.multiplier.
+ masm.movq(ImmWord(uint64_t(rmc.multiplier)), rax);
+ masm.imulq(lhs);
+ if (rmc.multiplier > __int128_t(INT64_MAX)) {
+ MOZ_ASSERT(rmc.multiplier < (__int128_t(1) << 64));
+
+ // We actually computed rdx = ((int64_t(M) * n) >> 64) instead. Since
+ // (M * n) >> 64 is the same as (rdx + n), we can correct for the overflow.
+ // (rdx + n) can't overflow, as n and rdx have opposite signs because
+ // int64_t(M) is negative.
+ masm.addq(lhs, rdx);
+ }
+ // (M * n) >> (64 + shift) is the truncated division answer if n is
+ // non-negative, as proved in the comments of computeDivisionConstants. We
+ // must add 1 later if n is negative to get the right answer in all cases.
+ if (rmc.shiftAmount > 0) {
+ masm.sarq(Imm32(rmc.shiftAmount), rdx);
+ }
+
+ // We'll subtract -1 instead of adding 1, because (n < 0 ? -1 : 0) can be
+ // computed with just a sign-extending shift of 63 bits.
+ if (mir->canBeNegativeDividend()) {
+ masm.movq(lhs, rax);
+ masm.sarq(Imm32(63), rax);
+ masm.subq(rax, rdx);
+ }
+
+ // After this, rdx contains the correct truncated division result.
+ if (d < 0) {
+ masm.negq(rdx);
+ }
+}
+
+void CodeGenerator::visitDivConstantI64(LDivConstantI64* ins) {
+ int32_t d = ins->denominator();
+
+ // This emits the division answer into rdx.
+ MOZ_ASSERT(ToRegister(ins->output()) == rdx);
+ MOZ_ASSERT(ToRegister(ins->temp0()) == rax);
+
+ if (d == 0) {
+ masm.wasmTrap(wasm::Trap::IntegerDivideByZero, ins->mir()->trapSiteDesc());
+ return;
+ }
+
+ // Compute the truncated division result in rdx.
+ Divide64WithConstant(masm, ins);
+}
+
+void CodeGenerator::visitModConstantI64(LModConstantI64* ins) {
+ Register lhs = ToRegister(ins->numerator());
+ int64_t d = ins->denominator();
+
+ // This emits the modulus answer into rax.
+ MOZ_ASSERT(ToRegister(ins->output()) == rax);
+ MOZ_ASSERT(ToRegister(ins->temp0()) == rdx);
+
+ if (d == 0) {
+ masm.wasmTrap(wasm::Trap::IntegerDivideByZero, ins->mir()->trapSiteDesc());
+ return;
+ }
+
+ // Compute the truncated division result in rdx.
+ Divide64WithConstant(masm, ins);
+
+ // Compute the remainder in |rax|: rax = lhs - d * rdx
+ masm.mul64(Imm64(d), Register64(rdx));
+ masm.movq(lhs, rax);
+ masm.subq(rdx, rax);
+}
+
+template <class LUDivOrUMod>
+static void UnsignedDivide64WithConstant(MacroAssembler& masm,
+ LUDivOrUMod* ins) {
+ Register lhs = ToRegister(ins->numerator());
+ [[maybe_unused]] Register output = ToRegister(ins->output());
+ [[maybe_unused]] Register temp = ToRegister(ins->temp0());
+ uint64_t d = ins->denominator();
+
+ MOZ_ASSERT(lhs != rax && lhs != rdx);
+ MOZ_ASSERT((output == rax && temp == rdx) || (output == rdx && temp == rax));
+
+ // The denominator isn't a power of 2 (see LDivPowTwoI and LModPowTwoI).
+ MOZ_ASSERT(!mozilla::IsPowerOfTwo(d));
+
+ auto rmc = ReciprocalMulConstants::computeUnsignedDivisionConstants(d);
+
+ // We first compute (M * n) >> 64, where M = rmc.multiplier.
+ masm.movq(ImmWord(uint64_t(rmc.multiplier)), rax);
+ masm.umulq(lhs);
+ if (rmc.multiplier > __int128_t(UINT64_MAX)) {
+ // M >= 2^64 and shift == 0 is impossible, as d >= 2 implies that
+ // ((M * n) >> (64 + shift)) >= n > floor(n/d) whenever n >= d,
+ // contradicting the proof of correctness in computeDivisionConstants.
+ MOZ_ASSERT(rmc.shiftAmount > 0);
+ MOZ_ASSERT(rmc.multiplier < (__int128_t(1) << 65));
+
+ // We actually computed rdx = ((uint64_t(M) * n) >> 64) instead. Since
+ // (M * n) >> (64 + shift) is the same as (rdx + n) >> shift, we can correct
+ // for the overflow. This case is a bit trickier than the signed case,
+ // though, as the (rdx + n) addition itself can overflow; however, note that
+ // (rdx + n) >> shift == (((n - rdx) >> 1) + rdx) >> (shift - 1),
+ // which is overflow-free. See Hacker's Delight, section 10-8 for details.
+
+ // Compute (n - rdx) >> 1 into temp.
+ masm.movq(lhs, rax);
+ masm.subq(rdx, rax);
+ masm.shrq(Imm32(1), rax);
+
+ // Finish the computation.
+ masm.addq(rax, rdx);
+ masm.shrq(Imm32(rmc.shiftAmount - 1), rdx);
+ } else {
+ if (rmc.shiftAmount > 0) {
+ masm.shrq(Imm32(rmc.shiftAmount), rdx);
+ }
+ }
+}
+
+void CodeGenerator::visitUDivConstantI64(LUDivConstantI64* ins) {
+ uint64_t d = ins->denominator();
+
+ // This emits the division answer into rdx.
+ MOZ_ASSERT(ToRegister(ins->output()) == rdx);
+ MOZ_ASSERT(ToRegister(ins->temp0()) == rax);
+
+ if (d == 0) {
+ masm.wasmTrap(wasm::Trap::IntegerDivideByZero, ins->mir()->trapSiteDesc());
+ return;
+ }
+
+ // Compute the truncated division result in rdx.
+ UnsignedDivide64WithConstant(masm, ins);
+}
+
+void CodeGenerator::visitUModConstantI64(LUModConstantI64* ins) {
+ Register lhs = ToRegister(ins->numerator());
+ uint64_t d = ins->denominator();
+
+ // This emits the modulus answer into rax.
+ MOZ_ASSERT(ToRegister(ins->output()) == rax);
+ MOZ_ASSERT(ToRegister(ins->temp0()) == rdx);
+
+ if (d == 0) {
+ masm.wasmTrap(wasm::Trap::IntegerDivideByZero, ins->mir()->trapSiteDesc());
+ return;
+ }
+
+ // Compute the truncated division result in rdx.
+ UnsignedDivide64WithConstant(masm, ins);
+
+ // Compute the remainder in |rax|: rax = lhs - d * rdx
+ masm.mul64(Imm64(d), Register64(rdx));
+ masm.movq(lhs, rax);
+ masm.subq(rdx, rax);
+}
+
void CodeGeneratorX64::emitBigIntPtrDiv(LBigIntPtrDiv* ins, Register dividend,
Register divisor, Register output) {
// Callers handle division by zero and integer overflow.
diff --git a/js/src/jit/x64/Lowering-x64.cpp b/js/src/jit/x64/Lowering-x64.cpp
@@ -7,6 +7,7 @@
#include "jit/x64/Lowering-x64.h"
#include "mozilla/CheckedInt.h"
+#include "mozilla/MathAlgorithms.h"
#include "jit/Lowering.h"
#include "jit/MIR-wasm.h"
@@ -526,35 +527,120 @@ void LIRGenerator::visitSubstr(MSubstr* ins) {
}
void LIRGeneratorX64::lowerDivI64(MDiv* div) {
+ // Division instructions are slow. Division by constant denominators can be
+ // rewritten to use other instructions.
+ if (div->rhs()->isConstant()) {
+ int64_t rhs = div->rhs()->toConstant()->toInt64();
+
+ // Division by powers of two can be done by shifting, and division by
+ // other numbers can be done by a reciprocal multiplication technique.
+ if (mozilla::IsPowerOfTwo(mozilla::Abs(rhs))) {
+ int32_t shift = mozilla::FloorLog2(mozilla::Abs(rhs));
+ LAllocation lhs = useRegisterAtStart(div->lhs());
+
+ // We have to round the result toward 0 when the remainder is non-zero.
+ // This requires an extra register to round up/down when the left-hand
+ // side is signed.
+ LAllocation lhsCopy = div->canBeNegativeDividend()
+ ? useRegister(div->lhs())
+ : LAllocation();
+
+ auto* lir = new (alloc()) LDivPowTwoI64(lhs, lhsCopy, shift, rhs < 0);
+ defineReuseInput(lir, div, 0);
+ return;
+ }
+
+ auto* lir = new (alloc())
+ LDivConstantI64(useRegister(div->lhs()), tempFixed(rax), rhs);
+ defineFixed(lir, div, LAllocation(AnyRegister(rdx)));
+ return;
+ }
+
auto* lir = new (alloc()) LDivI64(useFixedAtStart(div->lhs(), rax),
useRegister(div->rhs()), tempFixed(rdx));
- defineInt64Fixed(lir, div, LInt64Allocation(LAllocation(AnyRegister(rax))));
-}
-
-void LIRGeneratorX64::lowerWasmBuiltinDivI64(MWasmBuiltinDivI64* div) {
- MOZ_CRASH("We don't use runtime div for this architecture");
+ defineFixed(lir, div, LAllocation(AnyRegister(rax)));
}
void LIRGeneratorX64::lowerModI64(MMod* mod) {
+ if (mod->rhs()->isConstant()) {
+ int64_t rhs = mod->rhs()->toConstant()->toInt64();
+
+ if (mozilla::IsPowerOfTwo(mozilla::Abs(rhs))) {
+ int32_t shift = mozilla::FloorLog2(mozilla::Abs(rhs));
+
+ auto* lir =
+ new (alloc()) LModPowTwoI64(useRegisterAtStart(mod->lhs()), shift);
+ defineReuseInput(lir, mod, 0);
+ return;
+ }
+
+ auto* lir = new (alloc())
+ LModConstantI64(useRegister(mod->lhs()), tempFixed(rdx), rhs);
+ defineFixed(lir, mod, LAllocation(AnyRegister(rax)));
+ return;
+ }
+
auto* lir = new (alloc()) LModI64(useFixedAtStart(mod->lhs(), rax),
useRegister(mod->rhs()), tempFixed(rax));
- defineInt64Fixed(lir, mod, LInt64Allocation(LAllocation(AnyRegister(rdx))));
-}
-
-void LIRGeneratorX64::lowerWasmBuiltinModI64(MWasmBuiltinModI64* mod) {
- MOZ_CRASH("We don't use runtime mod for this architecture");
+ defineFixed(lir, mod, LAllocation(AnyRegister(rdx)));
}
void LIRGeneratorX64::lowerUDivI64(MDiv* div) {
+ if (div->rhs()->isConstant()) {
+ // NOTE: the result of toInt64 is coerced to uint64_t.
+ uint64_t rhs = div->rhs()->toConstant()->toInt64();
+
+ if (mozilla::IsPowerOfTwo(rhs)) {
+ int32_t shift = mozilla::FloorLog2(rhs);
+
+ auto* lir = new (alloc()) LDivPowTwoI64(useRegisterAtStart(div->lhs()),
+ LAllocation(), shift, false);
+ defineReuseInput(lir, div, 0);
+ return;
+ }
+
+ auto* lir = new (alloc())
+ LUDivConstantI64(useRegister(div->lhs()), tempFixed(rax), rhs);
+ defineFixed(lir, div, LAllocation(AnyRegister(rdx)));
+ return;
+ }
+
auto* lir = new (alloc()) LUDivI64(useFixedAtStart(div->lhs(), rax),
useRegister(div->rhs()), tempFixed(rdx));
- defineInt64Fixed(lir, div, LInt64Allocation(LAllocation(AnyRegister(rax))));
+ defineFixed(lir, div, LAllocation(AnyRegister(rax)));
}
void LIRGeneratorX64::lowerUModI64(MMod* mod) {
+ if (mod->rhs()->isConstant()) {
+ // NOTE: the result of toInt64 is coerced to uint64_t.
+ uint64_t rhs = mod->rhs()->toConstant()->toInt64();
+
+ if (mozilla::IsPowerOfTwo(rhs)) {
+ int32_t shift = mozilla::FloorLog2(rhs);
+
+ auto* lir =
+ new (alloc()) LModPowTwoI64(useRegisterAtStart(mod->lhs()), shift);
+ defineReuseInput(lir, mod, 0);
+ return;
+ }
+
+ auto* lir = new (alloc())
+ LUModConstantI64(useRegister(mod->lhs()), tempFixed(rdx), rhs);
+ defineFixed(lir, mod, LAllocation(AnyRegister(rax)));
+ return;
+ }
+
auto* lir = new (alloc()) LUModI64(useFixedAtStart(mod->lhs(), rax),
useRegister(mod->rhs()), tempFixed(rax));
- defineInt64Fixed(lir, mod, LInt64Allocation(LAllocation(AnyRegister(rdx))));
+ defineFixed(lir, mod, LAllocation(AnyRegister(rdx)));
+}
+
+void LIRGeneratorX64::lowerWasmBuiltinDivI64(MWasmBuiltinDivI64* div) {
+ MOZ_CRASH("We don't use runtime div for this architecture");
+}
+
+void LIRGeneratorX64::lowerWasmBuiltinModI64(MWasmBuiltinModI64* mod) {
+ MOZ_CRASH("We don't use runtime mod for this architecture");
}
void LIRGeneratorX64::lowerBigIntPtrDiv(MBigIntPtrDiv* ins) {