tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit 84f5d2f54b355cf304b9cf93a876abd4b74310b7
parent 3ffb5536c10d97a20741ffa67273d8107028e8cf
Author: André Bargull <andre.bargull@gmail.com>
Date:   Sat, 15 Nov 2025 10:15:24 +0000

Bug 1998457 - Part 3: Optimise int64 divison and modulus with constants in x64. r=spidermonkey-reviewers,iain

These LIR instructions match their 32-bit counterparts in x86-shared.

Differential Revision: https://phabricator.services.mozilla.com/D271438

Diffstat:
Mjs/src/jit/LIROps.yaml | 54++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mjs/src/jit/x64/CodeGenerator-x64.cpp | 294+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mjs/src/jit/x64/Lowering-x64.cpp | 110++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------
3 files changed, 446 insertions(+), 12 deletions(-)

diff --git a/js/src/jit/LIROps.yaml b/js/src/jit/LIROps.yaml @@ -4322,6 +4322,60 @@ num_temps: 1 mir_op: Mod +- name: DivPowTwoI64 + result_type: WordSized + operands: + numerator: WordSized + numeratorCopy: WordSized + arguments: + shift: int32_t + negativeDivisor: bool + mir_op: Div + +- name: ModPowTwoI64 + result_type: WordSized + operands: + input: WordSized + arguments: + shift: int32_t + mir_op: Mod + +- name: DivConstantI64 + result_type: WordSized + operands: + numerator: WordSized + arguments: + denominator: int64_t + num_temps: 1 + mir_op: Div + +- name: ModConstantI64 + result_type: WordSized + operands: + numerator: WordSized + arguments: + denominator: int64_t + num_temps: 1 + mir_op: Mod + +- name: UDivConstantI64 + result_type: WordSized + operands: + numerator: WordSized + arguments: + denominator: uint64_t + num_temps: 1 + mir_op: Div + +- name: UModConstantI64 + result_type: WordSized + operands: + numerator: WordSized + arguments: + denominator: uint64_t + num_temps: 1 + mir_op: Mod + - name: WasmTruncateToInt64 result_type: Int64 operands: diff --git a/js/src/jit/x64/CodeGenerator-x64.cpp b/js/src/jit/x64/CodeGenerator-x64.cpp @@ -12,6 +12,7 @@ #include "jit/CodeGenerator.h" #include "jit/MIR-wasm.h" #include "jit/MIR.h" +#include "jit/ReciprocalMulConstants.h" #include "js/ScalarType.h" // js::Scalar::Type #include "jit/MacroAssembler-inl.h" @@ -308,6 +309,299 @@ void CodeGenerator::visitUModI64(LUModI64* lir) { masm.udivq(rhs); } +void CodeGenerator::visitDivPowTwoI64(LDivPowTwoI64* ins) { + Register lhs = ToRegister(ins->numerator()); + + int32_t shift = ins->shift(); + bool negativeDivisor = ins->negativeDivisor(); + MDiv* mir = ins->mir(); + + // We use defineReuseInput so these should always be the same, which is + // convenient since all of our instructions here are two-address. + MOZ_ASSERT(lhs == ToRegister(ins->output())); + + // Unsigned division is just a right-shift. + if (mir->isUnsigned()) { + if (shift != 0) { + masm.shrq(Imm32(shift), lhs); + } + return; + } + + if (shift != 0) { + // Adjust the value so that shifting produces a correctly rounded result + // when the numerator is negative. + // See 10-1 "Signed Division by a Known Power of 2" in Henry S. Warren, + // Jr.'s Hacker's Delight. + if (mir->canBeNegativeDividend()) { + Register lhsCopy = ToRegister(ins->numeratorCopy()); + MOZ_ASSERT(lhsCopy != lhs); + if (shift > 1) { + // Copy the sign bit of the numerator. (= (2^63 - 1) or 0) + masm.sarq(Imm32(63), lhs); + } + // Divide by 2^(64 - shift) + // i.e. (= (2^64 - 1) / 2^(64 - shift) or 0) + // i.e. (= (2^shift - 1) or 0) + masm.shrq(Imm32(64 - shift), lhs); + // If signed, make any 1 bit below the shifted bits to bubble up, such + // that once shifted the value would be rounded towards 0. + masm.addq(lhsCopy, lhs); + } + masm.sarq(Imm32(shift), lhs); + } + + if (negativeDivisor) { + masm.negq(lhs); + } + + if (shift == 0 && negativeDivisor) { + // INT64_MIN / -1 overflows. + Label ok; + masm.j(Assembler::NoOverflow, &ok); + masm.wasmTrap(wasm::Trap::IntegerOverflow, mir->trapSiteDesc()); + masm.bind(&ok); + } +} + +void CodeGenerator::visitModPowTwoI64(LModPowTwoI64* ins) { + Register64 lhs = Register64(ToRegister(ins->input())); + int32_t shift = ins->shift(); + bool canBeNegative = + !ins->mir()->isUnsigned() && ins->mir()->canBeNegativeDividend(); + + MOZ_ASSERT(lhs.reg == ToRegister(ins->output())); + + if (shift == 0) { + masm.xorl(lhs.reg, lhs.reg); + return; + } + + auto clearHighBits = [&]() { + switch (shift) { + case 8: + masm.movzbl(lhs.reg, lhs.reg); + break; + case 16: + masm.movzwl(lhs.reg, lhs.reg); + break; + case 32: + masm.movl(lhs.reg, lhs.reg); + break; + default: + masm.and64(Imm64((uint64_t(1) << shift) - 1), lhs); + break; + } + }; + + Label negative; + + if (canBeNegative) { + // Switch based on sign of the lhs. + // Positive numbers are just a bitmask + masm.branchTest64(Assembler::Signed, lhs, lhs, &negative); + } + + clearHighBits(); + + if (canBeNegative) { + Label done; + masm.jump(&done); + + // Negative numbers need a negate, bitmask, negate + masm.bind(&negative); + + // Unlike in the visitModI64 case, we are not computing the mod by means of + // a division. Therefore, the divisor = -1 case isn't problematic (the andq + // always returns 0, which is what we expect). + // + // The negq instruction overflows if lhs == INT64_MIN, but this is also not + // a problem: shift is at most 63, and so the andq also always returns 0. + masm.neg64(lhs); + clearHighBits(); + masm.neg64(lhs); + + masm.bind(&done); + } +} + +template <class LDivOrMod> +static void Divide64WithConstant(MacroAssembler& masm, LDivOrMod* ins) { + Register lhs = ToRegister(ins->numerator()); + [[maybe_unused]] Register output = ToRegister(ins->output()); + [[maybe_unused]] Register temp = ToRegister(ins->temp0()); + int64_t d = ins->denominator(); + + MOZ_ASSERT(lhs != rax && lhs != rdx); + MOZ_ASSERT((output == rax && temp == rdx) || (output == rdx && temp == rax)); + + // The absolute value of the denominator isn't a power of 2 (see LDivPowTwoI64 + // and LModPowTwoI64). + MOZ_ASSERT(!mozilla::IsPowerOfTwo(mozilla::Abs(d))); + + auto* mir = ins->mir(); + + // We will first divide by Abs(d), and negate the answer if d is negative. + // If desired, this can be avoided by generalizing computeDivisionConstants. + auto rmc = ReciprocalMulConstants::computeSignedDivisionConstants(d); + + // We first compute (M * n) >> 64, where M = rmc.multiplier. + masm.movq(ImmWord(uint64_t(rmc.multiplier)), rax); + masm.imulq(lhs); + if (rmc.multiplier > Int128(INT64_MAX)) { + MOZ_ASSERT(rmc.multiplier < (Int128(1) << 64)); + + // We actually computed rdx = ((int64_t(M) * n) >> 64) instead. Since + // (M * n) >> 64 is the same as (rdx + n), we can correct for the overflow. + // (rdx + n) can't overflow, as n and rdx have opposite signs because + // int64_t(M) is negative. + masm.addq(lhs, rdx); + } + // (M * n) >> (64 + shift) is the truncated division answer if n is + // non-negative, as proved in the comments of computeDivisionConstants. We + // must add 1 later if n is negative to get the right answer in all cases. + if (rmc.shiftAmount > 0) { + masm.sarq(Imm32(rmc.shiftAmount), rdx); + } + + // We'll subtract -1 instead of adding 1, because (n < 0 ? -1 : 0) can be + // computed with just a sign-extending shift of 63 bits. + if (mir->canBeNegativeDividend()) { + masm.movq(lhs, rax); + masm.sarq(Imm32(63), rax); + masm.subq(rax, rdx); + } + + // After this, rdx contains the correct truncated division result. + if (d < 0) { + masm.negq(rdx); + } +} + +void CodeGenerator::visitDivConstantI64(LDivConstantI64* ins) { + int32_t d = ins->denominator(); + + // This emits the division answer into rdx. + MOZ_ASSERT(ToRegister(ins->output()) == rdx); + MOZ_ASSERT(ToRegister(ins->temp0()) == rax); + + if (d == 0) { + masm.wasmTrap(wasm::Trap::IntegerDivideByZero, ins->mir()->trapSiteDesc()); + return; + } + + // Compute the truncated division result in rdx. + Divide64WithConstant(masm, ins); +} + +void CodeGenerator::visitModConstantI64(LModConstantI64* ins) { + Register lhs = ToRegister(ins->numerator()); + int64_t d = ins->denominator(); + + // This emits the modulus answer into rax. + MOZ_ASSERT(ToRegister(ins->output()) == rax); + MOZ_ASSERT(ToRegister(ins->temp0()) == rdx); + + if (d == 0) { + masm.wasmTrap(wasm::Trap::IntegerDivideByZero, ins->mir()->trapSiteDesc()); + return; + } + + // Compute the truncated division result in rdx. + Divide64WithConstant(masm, ins); + + // Compute the remainder in |rax|: rax = lhs - d * rdx + masm.mul64(Imm64(d), Register64(rdx)); + masm.movq(lhs, rax); + masm.subq(rdx, rax); +} + +template <class LUDivOrUMod> +static void UnsignedDivide64WithConstant(MacroAssembler& masm, + LUDivOrUMod* ins) { + Register lhs = ToRegister(ins->numerator()); + [[maybe_unused]] Register output = ToRegister(ins->output()); + [[maybe_unused]] Register temp = ToRegister(ins->temp0()); + uint64_t d = ins->denominator(); + + MOZ_ASSERT(lhs != rax && lhs != rdx); + MOZ_ASSERT((output == rax && temp == rdx) || (output == rdx && temp == rax)); + + // The denominator isn't a power of 2 (see LDivPowTwoI and LModPowTwoI). + MOZ_ASSERT(!mozilla::IsPowerOfTwo(d)); + + auto rmc = ReciprocalMulConstants::computeUnsignedDivisionConstants(d); + + // We first compute (M * n) >> 64, where M = rmc.multiplier. + masm.movq(ImmWord(uint64_t(rmc.multiplier)), rax); + masm.umulq(lhs); + if (rmc.multiplier > Int128(UINT64_MAX)) { + // M >= 2^64 and shift == 0 is impossible, as d >= 2 implies that + // ((M * n) >> (64 + shift)) >= n > floor(n/d) whenever n >= d, + // contradicting the proof of correctness in computeDivisionConstants. + MOZ_ASSERT(rmc.shiftAmount > 0); + MOZ_ASSERT(rmc.multiplier < (Int128(1) << 65)); + + // We actually computed rdx = ((uint64_t(M) * n) >> 64) instead. Since + // (M * n) >> (64 + shift) is the same as (rdx + n) >> shift, we can correct + // for the overflow. This case is a bit trickier than the signed case, + // though, as the (rdx + n) addition itself can overflow; however, note that + // (rdx + n) >> shift == (((n - rdx) >> 1) + rdx) >> (shift - 1), + // which is overflow-free. See Hacker's Delight, section 10-8 for details. + + // Compute (n - rdx) >> 1 into temp. + masm.movq(lhs, rax); + masm.subq(rdx, rax); + masm.shrq(Imm32(1), rax); + + // Finish the computation. + masm.addq(rax, rdx); + masm.shrq(Imm32(rmc.shiftAmount - 1), rdx); + } else { + if (rmc.shiftAmount > 0) { + masm.shrq(Imm32(rmc.shiftAmount), rdx); + } + } +} + +void CodeGenerator::visitUDivConstantI64(LUDivConstantI64* ins) { + uint64_t d = ins->denominator(); + + // This emits the division answer into rdx. + MOZ_ASSERT(ToRegister(ins->output()) == rdx); + MOZ_ASSERT(ToRegister(ins->temp0()) == rax); + + if (d == 0) { + masm.wasmTrap(wasm::Trap::IntegerDivideByZero, ins->mir()->trapSiteDesc()); + return; + } + + // Compute the truncated division result in rdx. + UnsignedDivide64WithConstant(masm, ins); +} + +void CodeGenerator::visitUModConstantI64(LUModConstantI64* ins) { + Register lhs = ToRegister(ins->numerator()); + uint64_t d = ins->denominator(); + + // This emits the modulus answer into rax. + MOZ_ASSERT(ToRegister(ins->output()) == rax); + MOZ_ASSERT(ToRegister(ins->temp0()) == rdx); + + if (d == 0) { + masm.wasmTrap(wasm::Trap::IntegerDivideByZero, ins->mir()->trapSiteDesc()); + return; + } + + // Compute the truncated division result in rdx. + UnsignedDivide64WithConstant(masm, ins); + + // Compute the remainder in |rax|: rax = lhs - d * rdx + masm.mul64(Imm64(d), Register64(rdx)); + masm.movq(lhs, rax); + masm.subq(rdx, rax); +} + void CodeGeneratorX64::emitBigIntPtrDiv(LBigIntPtrDiv* ins, Register dividend, Register divisor, Register output) { // Callers handle division by zero and integer overflow. diff --git a/js/src/jit/x64/Lowering-x64.cpp b/js/src/jit/x64/Lowering-x64.cpp @@ -7,6 +7,7 @@ #include "jit/x64/Lowering-x64.h" #include "mozilla/CheckedInt.h" +#include "mozilla/MathAlgorithms.h" #include "jit/Lowering.h" #include "jit/MIR-wasm.h" @@ -526,35 +527,120 @@ void LIRGenerator::visitSubstr(MSubstr* ins) { } void LIRGeneratorX64::lowerDivI64(MDiv* div) { + // Division instructions are slow. Division by constant denominators can be + // rewritten to use other instructions. + if (div->rhs()->isConstant()) { + int64_t rhs = div->rhs()->toConstant()->toInt64(); + + // Division by powers of two can be done by shifting, and division by + // other numbers can be done by a reciprocal multiplication technique. + if (mozilla::IsPowerOfTwo(mozilla::Abs(rhs))) { + int32_t shift = mozilla::FloorLog2(mozilla::Abs(rhs)); + LAllocation lhs = useRegisterAtStart(div->lhs()); + + // We have to round the result toward 0 when the remainder is non-zero. + // This requires an extra register to round up/down when the left-hand + // side is signed. + LAllocation lhsCopy = div->canBeNegativeDividend() + ? useRegister(div->lhs()) + : LAllocation(); + + auto* lir = new (alloc()) LDivPowTwoI64(lhs, lhsCopy, shift, rhs < 0); + defineReuseInput(lir, div, 0); + return; + } + + auto* lir = new (alloc()) + LDivConstantI64(useRegister(div->lhs()), tempFixed(rax), rhs); + defineFixed(lir, div, LAllocation(AnyRegister(rdx))); + return; + } + auto* lir = new (alloc()) LDivI64(useFixedAtStart(div->lhs(), rax), useRegister(div->rhs()), tempFixed(rdx)); - defineInt64Fixed(lir, div, LInt64Allocation(LAllocation(AnyRegister(rax)))); -} - -void LIRGeneratorX64::lowerWasmBuiltinDivI64(MWasmBuiltinDivI64* div) { - MOZ_CRASH("We don't use runtime div for this architecture"); + defineFixed(lir, div, LAllocation(AnyRegister(rax))); } void LIRGeneratorX64::lowerModI64(MMod* mod) { + if (mod->rhs()->isConstant()) { + int64_t rhs = mod->rhs()->toConstant()->toInt64(); + + if (mozilla::IsPowerOfTwo(mozilla::Abs(rhs))) { + int32_t shift = mozilla::FloorLog2(mozilla::Abs(rhs)); + + auto* lir = + new (alloc()) LModPowTwoI64(useRegisterAtStart(mod->lhs()), shift); + defineReuseInput(lir, mod, 0); + return; + } + + auto* lir = new (alloc()) + LModConstantI64(useRegister(mod->lhs()), tempFixed(rdx), rhs); + defineFixed(lir, mod, LAllocation(AnyRegister(rax))); + return; + } + auto* lir = new (alloc()) LModI64(useFixedAtStart(mod->lhs(), rax), useRegister(mod->rhs()), tempFixed(rax)); - defineInt64Fixed(lir, mod, LInt64Allocation(LAllocation(AnyRegister(rdx)))); -} - -void LIRGeneratorX64::lowerWasmBuiltinModI64(MWasmBuiltinModI64* mod) { - MOZ_CRASH("We don't use runtime mod for this architecture"); + defineFixed(lir, mod, LAllocation(AnyRegister(rdx))); } void LIRGeneratorX64::lowerUDivI64(MDiv* div) { + if (div->rhs()->isConstant()) { + // NOTE: the result of toInt64 is coerced to uint64_t. + uint64_t rhs = div->rhs()->toConstant()->toInt64(); + + if (mozilla::IsPowerOfTwo(rhs)) { + int32_t shift = mozilla::FloorLog2(rhs); + + auto* lir = new (alloc()) LDivPowTwoI64(useRegisterAtStart(div->lhs()), + LAllocation(), shift, false); + defineReuseInput(lir, div, 0); + return; + } + + auto* lir = new (alloc()) + LUDivConstantI64(useRegister(div->lhs()), tempFixed(rax), rhs); + defineFixed(lir, div, LAllocation(AnyRegister(rdx))); + return; + } + auto* lir = new (alloc()) LUDivI64(useFixedAtStart(div->lhs(), rax), useRegister(div->rhs()), tempFixed(rdx)); - defineInt64Fixed(lir, div, LInt64Allocation(LAllocation(AnyRegister(rax)))); + defineFixed(lir, div, LAllocation(AnyRegister(rax))); } void LIRGeneratorX64::lowerUModI64(MMod* mod) { + if (mod->rhs()->isConstant()) { + // NOTE: the result of toInt64 is coerced to uint64_t. + uint64_t rhs = mod->rhs()->toConstant()->toInt64(); + + if (mozilla::IsPowerOfTwo(rhs)) { + int32_t shift = mozilla::FloorLog2(rhs); + + auto* lir = + new (alloc()) LModPowTwoI64(useRegisterAtStart(mod->lhs()), shift); + defineReuseInput(lir, mod, 0); + return; + } + + auto* lir = new (alloc()) + LUModConstantI64(useRegister(mod->lhs()), tempFixed(rdx), rhs); + defineFixed(lir, mod, LAllocation(AnyRegister(rax))); + return; + } + auto* lir = new (alloc()) LUModI64(useFixedAtStart(mod->lhs(), rax), useRegister(mod->rhs()), tempFixed(rax)); - defineInt64Fixed(lir, mod, LInt64Allocation(LAllocation(AnyRegister(rdx)))); + defineFixed(lir, mod, LAllocation(AnyRegister(rdx))); +} + +void LIRGeneratorX64::lowerWasmBuiltinDivI64(MWasmBuiltinDivI64* div) { + MOZ_CRASH("We don't use runtime div for this architecture"); +} + +void LIRGeneratorX64::lowerWasmBuiltinModI64(MWasmBuiltinModI64* mod) { + MOZ_CRASH("We don't use runtime mod for this architecture"); } void LIRGeneratorX64::lowerBigIntPtrDiv(MBigIntPtrDiv* ins) {