commit 646effac1b0c6a423966751b0f086a0b154c20a4
parent 84f5d2f54b355cf304b9cf93a876abd4b74310b7
Author: André Bargull <andre.bargull@gmail.com>
Date: Sat, 15 Nov 2025 10:15:24 +0000
Bug 1998457 - Part 4: Optimise int64 divison and modulus with constants in arm64. r=spidermonkey-reviewers,iain
Differential Revision: https://phabricator.services.mozilla.com/D271439
Diffstat:
3 files changed, 397 insertions(+), 14 deletions(-)
diff --git a/js/src/jit/LIROps.yaml b/js/src/jit/LIROps.yaml
@@ -4520,28 +4520,28 @@
#ifdef JS_CODEGEN_ARM64
- name: DivI64
- result_type: Int64
+ result_type: WordSized
operands:
lhs: WordSized
rhs: WordSized
mir_op: Div
- name: ModI64
- result_type: Int64
+ result_type: WordSized
operands:
lhs: WordSized
rhs: WordSized
mir_op: Mod
- name: UDivI64
- result_type: Int64
+ result_type: WordSized
operands:
lhs: WordSized
rhs: WordSized
mir_op: Div
- name: UModI64
- result_type: Int64
+ result_type: WordSized
operands:
lhs: WordSized
rhs: WordSized
@@ -4579,6 +4579,55 @@
denominator: uint32_t
mir_op: Mod
+- name: DivPowTwoI64
+ result_type: WordSized
+ operands:
+ numerator: WordSized
+ arguments:
+ shift: int32_t
+ negativeDivisor: bool
+ mir_op: Div
+
+- name: ModPowTwoI64
+ result_type: WordSized
+ operands:
+ input: WordSized
+ arguments:
+ shift: int32_t
+ mir_op: Mod
+
+- name: DivConstantI64
+ result_type: WordSized
+ operands:
+ numerator: WordSized
+ arguments:
+ denominator: int64_t
+ mir_op: Div
+
+- name: ModConstantI64
+ result_type: WordSized
+ operands:
+ numerator: WordSized
+ arguments:
+ denominator: int64_t
+ mir_op: Mod
+
+- name: UDivConstantI64
+ result_type: WordSized
+ operands:
+ numerator: WordSized
+ arguments:
+ denominator: uint64_t
+ mir_op: Div
+
+- name: UModConstantI64
+ result_type: WordSized
+ operands:
+ numerator: WordSized
+ arguments:
+ denominator: uint64_t
+ mir_op: Mod
+
- name: UDiv
result_type: WordSized
operands:
diff --git a/js/src/jit/arm64/CodeGenerator-arm64.cpp b/js/src/jit/arm64/CodeGenerator-arm64.cpp
@@ -506,6 +506,61 @@ void CodeGenerator::visitDivPowTwoI(LDivPowTwoI* ins) {
}
}
+void CodeGenerator::visitDivPowTwoI64(LDivPowTwoI64* ins) {
+ ARMRegister numerator64 = toXRegister(ins->numerator());
+ ARMRegister output64 = toXRegister(ins->output());
+
+ int32_t shift = ins->shift();
+ bool negativeDivisor = ins->negativeDivisor();
+ MDiv* mir = ins->mir();
+
+ if (shift) {
+ if (mir->isUnsigned()) {
+ // shift right
+ masm.Lsr(output64, numerator64, shift);
+ } else {
+ ARMRegister temp64 = numerator64;
+ // Adjust the value so that shifting produces a correctly
+ // rounded result when the numerator is negative. See 10-1
+ // "Signed Division by a Known Power of 2" in Henry
+ // S. Warren, Jr.'s Hacker's Delight.
+ if (mir->canBeNegativeDividend()) {
+ if (shift > 1) {
+ // Copy the sign bit of the numerator. (= (2^64 - 1) or 0)
+ masm.Asr(output64, numerator64, 63);
+ temp64 = output64;
+ }
+ // Divide by 2^(64 - shift)
+ // i.e. (= (2^64 - 1) / 2^(64 - shift) or 0)
+ // i.e. (= (2^shift - 1) or 0)
+ masm.Lsr(output64, temp64, 64 - shift);
+ // If signed, make any 1 bit below the shifted bits to bubble up, such
+ // that once shifted the value would be rounded towards 0.
+ masm.Add(output64, output64, numerator64);
+ temp64 = output64;
+ }
+ masm.Asr(output64, temp64, shift);
+
+ if (negativeDivisor) {
+ masm.Neg(output64, output64);
+ }
+ }
+ return;
+ }
+
+ if (negativeDivisor) {
+ // INT64_MIN / -1 overflows.
+ Label ok;
+ masm.Negs(output64, numerator64);
+ masm.branch(Assembler::NoOverflow, &ok);
+ masm.wasmTrap(wasm::Trap::IntegerOverflow, mir->trapSiteDesc());
+ masm.bind(&ok);
+ } else {
+ // Copy the result.
+ masm.Mov(output64, numerator64);
+ }
+}
+
template <class LDivOrMod>
static void DivideWithConstant(MacroAssembler& masm, LDivOrMod* ins) {
ARMRegister lhs32 = toWRegister(ins->numerator());
@@ -553,8 +608,7 @@ static void DivideWithConstant(MacroAssembler& masm, LDivOrMod* ins) {
// We'll subtract -1 instead of adding 1, because (n < 0 ? -1 : 0) can be
// computed with just a sign-extending shift of 31 bits.
if (mir->canBeNegativeDividend()) {
- masm.Asr(const32, lhs32, 31);
- masm.Sub(output32, output32, const32);
+ masm.Sub(output32, output32, Operand(lhs32, vixl::ASR, 31));
}
// After this, output32 contains the correct truncated division result.
@@ -700,6 +754,119 @@ void CodeGenerator::visitUDivConstant(LUDivConstant* ins) {
}
}
+template <class LDivOrMod>
+static void Divide64WithConstant(MacroAssembler& masm, LDivOrMod* ins) {
+ ARMRegister lhs64 = toXRegister(ins->numerator());
+ ARMRegister output64 = toXRegister(ins->output());
+ int64_t d = ins->denominator();
+
+ vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+ ARMRegister const64 = temps.AcquireX();
+
+ // The absolute value of the denominator isn't a power of 2.
+ MOZ_ASSERT(!mozilla::IsPowerOfTwo(mozilla::Abs(d)));
+
+ auto* mir = ins->mir();
+
+ // We will first divide by Abs(d), and negate the answer if d is negative.
+ // If desired, this can be avoided by generalizing computeDivisionConstants.
+ auto rmc = ReciprocalMulConstants::computeSignedDivisionConstants(d);
+
+ // We first compute (M * n) >> 64, where M = rmc.multiplier.
+ masm.Mov(const64, uint64_t(rmc.multiplier));
+ masm.Smulh(output64, lhs64, const64);
+ if (rmc.multiplier > Int128(INT64_MAX)) {
+ MOZ_ASSERT(rmc.multiplier < (Int128(1) << 64));
+
+ // We actually computed output = ((int64_t(M) * n) >> 64) instead. Since
+ // (M * n) >> 64 is the same as (output + n), we can correct for the
+ // overflow. (output + n) can't overflow, as n and output have opposite
+ // signs because int64_t(M) is negative.
+ masm.Add(output64, output64, lhs64);
+ }
+
+ // (M * n) >> (64 + shift) is the truncated division answer if n is
+ // non-negative, as proved in the comments of computeDivisionConstants. We
+ // must add 1 later if n is negative to get the right answer in all cases.
+ masm.Asr(output64, output64, rmc.shiftAmount);
+
+ // We'll subtract -1 instead of adding 1, because (n < 0 ? -1 : 0) can be
+ // computed with just a sign-extending shift of 63 bits.
+ if (mir->canBeNegativeDividend()) {
+ masm.Sub(output64, output64, Operand(lhs64, vixl::ASR, 63));
+ }
+
+ // After this, output64 contains the correct truncated division result.
+ if (d < 0) {
+ masm.Neg(output64, output64);
+ }
+}
+
+void CodeGenerator::visitDivConstantI64(LDivConstantI64* ins) {
+ int64_t d = ins->denominator();
+
+ if (d == 0) {
+ masm.wasmTrap(wasm::Trap::IntegerDivideByZero, ins->mir()->trapSiteDesc());
+ return;
+ }
+
+ // Compute the truncated division result.
+ Divide64WithConstant(masm, ins);
+}
+
+template <class LUDivOrUMod>
+static void UnsignedDivide64WithConstant(MacroAssembler& masm,
+ LUDivOrUMod* ins) {
+ ARMRegister lhs64 = toXRegister(ins->numerator());
+ ARMRegister output64 = toXRegister(ins->output());
+ uint64_t d = ins->denominator();
+
+ vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+ ARMRegister const64 = temps.AcquireX();
+
+ // The denominator isn't a power of 2 (see LDivPowTwoI).
+ MOZ_ASSERT(!mozilla::IsPowerOfTwo(d));
+
+ auto rmc = ReciprocalMulConstants::computeUnsignedDivisionConstants(d);
+
+ // We first compute (M * n) >> 64, where M = rmc.multiplier.
+ masm.Mov(const64, uint64_t(rmc.multiplier));
+ masm.Umulh(output64, lhs64, const64);
+ if (rmc.multiplier > Int128(UINT64_MAX)) {
+ // M >= 2^64 and shift == 0 is impossible, as d >= 2 implies that
+ // ((M * n) >> (64 + shift)) >= n > floor(n/d) whenever n >= d,
+ // contradicting the proof of correctness in computeDivisionConstants.
+ MOZ_ASSERT(rmc.shiftAmount > 0);
+ MOZ_ASSERT(rmc.multiplier < (Int128(1) << 65));
+
+ // We actually computed output = ((uint64_t(M) * n) >> 64) instead. Since
+ // (M * n) >> (64 + shift) is the same as (output + n) >> shift, we can
+ // correct for the overflow. This case is a bit trickier than the signed
+ // case, though, as the (output + n) addition itself can overflow; however,
+ // note that
+ // (output + n) >> shift == (((n - output) >> 1) + output) >> (shift - 1),
+ // which is overflow-free. See Hacker's Delight, section 10-8 for details.
+
+ masm.Sub(const64, lhs64, output64);
+ masm.Add(output64, output64, Operand(const64, vixl::LSR, 1));
+ masm.Lsr(output64, output64, rmc.shiftAmount - 1);
+ } else {
+ masm.Lsr(output64, output64, rmc.shiftAmount);
+ }
+}
+
+void CodeGenerator::visitUDivConstantI64(LUDivConstantI64* ins) {
+ uint64_t d = ins->denominator();
+
+ if (d == 0) {
+ masm.wasmTrap(wasm::Trap::IntegerDivideByZero, ins->mir()->trapSiteDesc());
+ return;
+ }
+
+ // Compute the truncated division result.
+ UnsignedDivide64WithConstant(masm, ins);
+}
+
void CodeGenerator::visitModI(LModI* ins) {
Register lhs = ToRegister(ins->lhs());
Register rhs = ToRegister(ins->rhs());
@@ -787,6 +954,54 @@ void CodeGenerator::visitModPowTwoI(LModPowTwoI* ins) {
}
}
+void CodeGenerator::visitModPowTwoI64(LModPowTwoI64* ins) {
+ Register lhs = ToRegister(ins->input());
+ ARMRegister lhs64 = toXRegister(ins->input());
+ ARMRegister out64 = toXRegister(ins->output());
+
+ int32_t shift = ins->shift();
+ bool canBeNegative =
+ !ins->mir()->isUnsigned() && ins->mir()->canBeNegativeDividend();
+
+ if (shift == 0) {
+ masm.Mov(out64, xzr);
+ return;
+ }
+
+ auto clearHighBits = [&](ARMRegister reg) {
+ switch (shift) {
+ case 32:
+ masm.Mov(out64.W(), reg.W());
+ break;
+ default:
+ masm.And(out64, reg, Operand((uint64_t(1) << shift) - 1));
+ break;
+ }
+ };
+
+ Label negative;
+ if (canBeNegative) {
+ // Switch based on sign of the lhs.
+ // Positive numbers are just a bitmask.
+ masm.branchTestPtr(Assembler::Signed, lhs, lhs, &negative);
+ }
+
+ clearHighBits(lhs64);
+
+ if (canBeNegative) {
+ Label done;
+ masm.jump(&done);
+
+ // Negative numbers need a negate, bitmask, negate.
+ masm.bind(&negative);
+ masm.Neg(out64, Operand(lhs64));
+ clearHighBits(out64);
+ masm.Neg(out64, Operand(out64));
+
+ masm.bind(&done);
+ }
+}
+
void CodeGenerator::visitModConstantI(LModConstantI* ins) {
Register lhs = ToRegister(ins->numerator());
ARMRegister lhs32 = toWRegister(ins->numerator());
@@ -866,6 +1081,54 @@ void CodeGenerator::visitUModConstant(LUModConstant* ins) {
}
}
+void CodeGenerator::visitModConstantI64(LModConstantI64* ins) {
+ ARMRegister lhs64 = toXRegister(ins->numerator());
+ ARMRegister output64 = toXRegister(ins->output());
+
+ int64_t d = ins->denominator();
+
+ if (d == 0) {
+ masm.wasmTrap(wasm::Trap::IntegerDivideByZero, ins->mir()->trapSiteDesc());
+ return;
+ }
+
+ // Compute the truncated division result in output64.
+ Divide64WithConstant(masm, ins);
+
+ // Compute the remainder: output = lhs - (output * rhs).
+ {
+ vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+ ARMRegister rhs64 = temps.AcquireX();
+
+ masm.Mov(rhs64, d);
+ masm.Msub(output64, output64, rhs64, lhs64);
+ }
+}
+
+void CodeGenerator::visitUModConstantI64(LUModConstantI64* ins) {
+ ARMRegister lhs64 = toXRegister(ins->numerator());
+ ARMRegister output64 = toXRegister(ins->output());
+
+ uint64_t d = ins->denominator();
+
+ if (d == 0) {
+ masm.wasmTrap(wasm::Trap::IntegerDivideByZero, ins->mir()->trapSiteDesc());
+ return;
+ }
+
+ // Compute the truncated division result in output64.
+ UnsignedDivide64WithConstant(masm, ins);
+
+ // Compute the remainder: output = lhs - (output * rhs).
+ {
+ vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+ ARMRegister rhs64 = temps.AcquireX();
+
+ masm.Mov(rhs64, d);
+ masm.Msub(output64, output64, rhs64, lhs64);
+ }
+}
+
void CodeGeneratorARM64::emitBigIntPtrDiv(LBigIntPtrDiv* ins, Register dividend,
Register divisor, Register output) {
// Callers handle division by zero and integer overflow.
diff --git a/js/src/jit/arm64/Lowering-arm64.cpp b/js/src/jit/arm64/Lowering-arm64.cpp
@@ -294,27 +294,98 @@ void LIRGeneratorARM64::lowerModI(MMod* mod) {
}
void LIRGeneratorARM64::lowerDivI64(MDiv* div) {
+ if (div->rhs()->isConstant()) {
+ LAllocation lhs = useRegister(div->lhs());
+ int64_t rhs = div->rhs()->toConstant()->toInt64();
+
+ if (mozilla::IsPowerOfTwo(mozilla::Abs(rhs))) {
+ int32_t shift = mozilla::FloorLog2(mozilla::Abs(rhs));
+
+ auto* lir = new (alloc()) LDivPowTwoI64(lhs, shift, rhs < 0);
+ define(lir, div);
+ return;
+ }
+
+ auto* lir = new (alloc()) LDivConstantI64(lhs, rhs);
+ define(lir, div);
+ return;
+ }
+
auto* lir = new (alloc())
LDivI64(useRegisterAtStart(div->lhs()), useRegisterAtStart(div->rhs()));
- defineInt64(lir, div);
+ define(lir, div);
}
void LIRGeneratorARM64::lowerModI64(MMod* mod) {
- auto* lir =
- new (alloc()) LModI64(useRegister(mod->lhs()), useRegister(mod->rhs()));
- defineInt64(lir, mod);
+ LAllocation lhs = useRegister(mod->lhs());
+
+ if (mod->rhs()->isConstant()) {
+ int64_t rhs = mod->rhs()->toConstant()->toInt64();
+
+ if (mozilla::IsPowerOfTwo(mozilla::Abs(rhs))) {
+ int32_t shift = mozilla::FloorLog2(mozilla::Abs(rhs));
+
+ auto* lir = new (alloc()) LModPowTwoI64(lhs, shift);
+ define(lir, mod);
+ return;
+ }
+
+ auto* lir = new (alloc()) LModConstantI64(lhs, rhs);
+ define(lir, mod);
+ return;
+ }
+
+ auto* lir = new (alloc()) LModI64(lhs, useRegister(mod->rhs()));
+ define(lir, mod);
}
void LIRGeneratorARM64::lowerUDivI64(MDiv* div) {
+ if (div->rhs()->isConstant()) {
+ LAllocation lhs = useRegister(div->lhs());
+
+ // NOTE: the result of toInt64 is coerced to uint64_t.
+ uint64_t rhs = div->rhs()->toConstant()->toInt64();
+
+ if (mozilla::IsPowerOfTwo(rhs)) {
+ int32_t shift = mozilla::FloorLog2(rhs);
+
+ auto* lir = new (alloc()) LDivPowTwoI64(lhs, shift, false);
+ define(lir, div);
+ return;
+ }
+
+ auto* lir = new (alloc()) LUDivConstantI64(lhs, rhs);
+ define(lir, div);
+ return;
+ }
+
auto* lir = new (alloc())
LUDivI64(useRegisterAtStart(div->lhs()), useRegisterAtStart(div->rhs()));
- defineInt64(lir, div);
+ define(lir, div);
}
void LIRGeneratorARM64::lowerUModI64(MMod* mod) {
- auto* lir =
- new (alloc()) LUModI64(useRegister(mod->lhs()), useRegister(mod->rhs()));
- defineInt64(lir, mod);
+ LAllocation lhs = useRegister(mod->lhs());
+
+ if (mod->rhs()->isConstant()) {
+ // NOTE: the result of toInt64 is coerced to uint64_t.
+ uint64_t rhs = mod->rhs()->toConstant()->toInt64();
+
+ if (mozilla::IsPowerOfTwo(rhs)) {
+ int32_t shift = mozilla::FloorLog2(rhs);
+
+ auto* lir = new (alloc()) LModPowTwoI64(lhs, shift);
+ define(lir, mod);
+ return;
+ }
+
+ auto* lir = new (alloc()) LUModConstantI64(lhs, rhs);
+ define(lir, mod);
+ return;
+ }
+
+ auto* lir = new (alloc()) LUModI64(lhs, useRegister(mod->rhs()));
+ define(lir, mod);
}
void LIRGeneratorARM64::lowerWasmBuiltinDivI64(MWasmBuiltinDivI64* div) {