tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit 646effac1b0c6a423966751b0f086a0b154c20a4
parent 84f5d2f54b355cf304b9cf93a876abd4b74310b7
Author: André Bargull <andre.bargull@gmail.com>
Date:   Sat, 15 Nov 2025 10:15:24 +0000

Bug 1998457 - Part 4: Optimise int64 divison and modulus with constants in arm64. r=spidermonkey-reviewers,iain

Differential Revision: https://phabricator.services.mozilla.com/D271439

Diffstat:
Mjs/src/jit/LIROps.yaml | 57+++++++++++++++++++++++++++++++++++++++++++++++++++++----
Mjs/src/jit/arm64/CodeGenerator-arm64.cpp | 267++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Mjs/src/jit/arm64/Lowering-arm64.cpp | 87+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
3 files changed, 397 insertions(+), 14 deletions(-)

diff --git a/js/src/jit/LIROps.yaml b/js/src/jit/LIROps.yaml @@ -4520,28 +4520,28 @@ #ifdef JS_CODEGEN_ARM64 - name: DivI64 - result_type: Int64 + result_type: WordSized operands: lhs: WordSized rhs: WordSized mir_op: Div - name: ModI64 - result_type: Int64 + result_type: WordSized operands: lhs: WordSized rhs: WordSized mir_op: Mod - name: UDivI64 - result_type: Int64 + result_type: WordSized operands: lhs: WordSized rhs: WordSized mir_op: Div - name: UModI64 - result_type: Int64 + result_type: WordSized operands: lhs: WordSized rhs: WordSized @@ -4579,6 +4579,55 @@ denominator: uint32_t mir_op: Mod +- name: DivPowTwoI64 + result_type: WordSized + operands: + numerator: WordSized + arguments: + shift: int32_t + negativeDivisor: bool + mir_op: Div + +- name: ModPowTwoI64 + result_type: WordSized + operands: + input: WordSized + arguments: + shift: int32_t + mir_op: Mod + +- name: DivConstantI64 + result_type: WordSized + operands: + numerator: WordSized + arguments: + denominator: int64_t + mir_op: Div + +- name: ModConstantI64 + result_type: WordSized + operands: + numerator: WordSized + arguments: + denominator: int64_t + mir_op: Mod + +- name: UDivConstantI64 + result_type: WordSized + operands: + numerator: WordSized + arguments: + denominator: uint64_t + mir_op: Div + +- name: UModConstantI64 + result_type: WordSized + operands: + numerator: WordSized + arguments: + denominator: uint64_t + mir_op: Mod + - name: UDiv result_type: WordSized operands: diff --git a/js/src/jit/arm64/CodeGenerator-arm64.cpp b/js/src/jit/arm64/CodeGenerator-arm64.cpp @@ -506,6 +506,61 @@ void CodeGenerator::visitDivPowTwoI(LDivPowTwoI* ins) { } } +void CodeGenerator::visitDivPowTwoI64(LDivPowTwoI64* ins) { + ARMRegister numerator64 = toXRegister(ins->numerator()); + ARMRegister output64 = toXRegister(ins->output()); + + int32_t shift = ins->shift(); + bool negativeDivisor = ins->negativeDivisor(); + MDiv* mir = ins->mir(); + + if (shift) { + if (mir->isUnsigned()) { + // shift right + masm.Lsr(output64, numerator64, shift); + } else { + ARMRegister temp64 = numerator64; + // Adjust the value so that shifting produces a correctly + // rounded result when the numerator is negative. See 10-1 + // "Signed Division by a Known Power of 2" in Henry + // S. Warren, Jr.'s Hacker's Delight. + if (mir->canBeNegativeDividend()) { + if (shift > 1) { + // Copy the sign bit of the numerator. (= (2^64 - 1) or 0) + masm.Asr(output64, numerator64, 63); + temp64 = output64; + } + // Divide by 2^(64 - shift) + // i.e. (= (2^64 - 1) / 2^(64 - shift) or 0) + // i.e. (= (2^shift - 1) or 0) + masm.Lsr(output64, temp64, 64 - shift); + // If signed, make any 1 bit below the shifted bits to bubble up, such + // that once shifted the value would be rounded towards 0. + masm.Add(output64, output64, numerator64); + temp64 = output64; + } + masm.Asr(output64, temp64, shift); + + if (negativeDivisor) { + masm.Neg(output64, output64); + } + } + return; + } + + if (negativeDivisor) { + // INT64_MIN / -1 overflows. + Label ok; + masm.Negs(output64, numerator64); + masm.branch(Assembler::NoOverflow, &ok); + masm.wasmTrap(wasm::Trap::IntegerOverflow, mir->trapSiteDesc()); + masm.bind(&ok); + } else { + // Copy the result. + masm.Mov(output64, numerator64); + } +} + template <class LDivOrMod> static void DivideWithConstant(MacroAssembler& masm, LDivOrMod* ins) { ARMRegister lhs32 = toWRegister(ins->numerator()); @@ -553,8 +608,7 @@ static void DivideWithConstant(MacroAssembler& masm, LDivOrMod* ins) { // We'll subtract -1 instead of adding 1, because (n < 0 ? -1 : 0) can be // computed with just a sign-extending shift of 31 bits. if (mir->canBeNegativeDividend()) { - masm.Asr(const32, lhs32, 31); - masm.Sub(output32, output32, const32); + masm.Sub(output32, output32, Operand(lhs32, vixl::ASR, 31)); } // After this, output32 contains the correct truncated division result. @@ -700,6 +754,119 @@ void CodeGenerator::visitUDivConstant(LUDivConstant* ins) { } } +template <class LDivOrMod> +static void Divide64WithConstant(MacroAssembler& masm, LDivOrMod* ins) { + ARMRegister lhs64 = toXRegister(ins->numerator()); + ARMRegister output64 = toXRegister(ins->output()); + int64_t d = ins->denominator(); + + vixl::UseScratchRegisterScope temps(&masm.asVIXL()); + ARMRegister const64 = temps.AcquireX(); + + // The absolute value of the denominator isn't a power of 2. + MOZ_ASSERT(!mozilla::IsPowerOfTwo(mozilla::Abs(d))); + + auto* mir = ins->mir(); + + // We will first divide by Abs(d), and negate the answer if d is negative. + // If desired, this can be avoided by generalizing computeDivisionConstants. + auto rmc = ReciprocalMulConstants::computeSignedDivisionConstants(d); + + // We first compute (M * n) >> 64, where M = rmc.multiplier. + masm.Mov(const64, uint64_t(rmc.multiplier)); + masm.Smulh(output64, lhs64, const64); + if (rmc.multiplier > Int128(INT64_MAX)) { + MOZ_ASSERT(rmc.multiplier < (Int128(1) << 64)); + + // We actually computed output = ((int64_t(M) * n) >> 64) instead. Since + // (M * n) >> 64 is the same as (output + n), we can correct for the + // overflow. (output + n) can't overflow, as n and output have opposite + // signs because int64_t(M) is negative. + masm.Add(output64, output64, lhs64); + } + + // (M * n) >> (64 + shift) is the truncated division answer if n is + // non-negative, as proved in the comments of computeDivisionConstants. We + // must add 1 later if n is negative to get the right answer in all cases. + masm.Asr(output64, output64, rmc.shiftAmount); + + // We'll subtract -1 instead of adding 1, because (n < 0 ? -1 : 0) can be + // computed with just a sign-extending shift of 63 bits. + if (mir->canBeNegativeDividend()) { + masm.Sub(output64, output64, Operand(lhs64, vixl::ASR, 63)); + } + + // After this, output64 contains the correct truncated division result. + if (d < 0) { + masm.Neg(output64, output64); + } +} + +void CodeGenerator::visitDivConstantI64(LDivConstantI64* ins) { + int64_t d = ins->denominator(); + + if (d == 0) { + masm.wasmTrap(wasm::Trap::IntegerDivideByZero, ins->mir()->trapSiteDesc()); + return; + } + + // Compute the truncated division result. + Divide64WithConstant(masm, ins); +} + +template <class LUDivOrUMod> +static void UnsignedDivide64WithConstant(MacroAssembler& masm, + LUDivOrUMod* ins) { + ARMRegister lhs64 = toXRegister(ins->numerator()); + ARMRegister output64 = toXRegister(ins->output()); + uint64_t d = ins->denominator(); + + vixl::UseScratchRegisterScope temps(&masm.asVIXL()); + ARMRegister const64 = temps.AcquireX(); + + // The denominator isn't a power of 2 (see LDivPowTwoI). + MOZ_ASSERT(!mozilla::IsPowerOfTwo(d)); + + auto rmc = ReciprocalMulConstants::computeUnsignedDivisionConstants(d); + + // We first compute (M * n) >> 64, where M = rmc.multiplier. + masm.Mov(const64, uint64_t(rmc.multiplier)); + masm.Umulh(output64, lhs64, const64); + if (rmc.multiplier > Int128(UINT64_MAX)) { + // M >= 2^64 and shift == 0 is impossible, as d >= 2 implies that + // ((M * n) >> (64 + shift)) >= n > floor(n/d) whenever n >= d, + // contradicting the proof of correctness in computeDivisionConstants. + MOZ_ASSERT(rmc.shiftAmount > 0); + MOZ_ASSERT(rmc.multiplier < (Int128(1) << 65)); + + // We actually computed output = ((uint64_t(M) * n) >> 64) instead. Since + // (M * n) >> (64 + shift) is the same as (output + n) >> shift, we can + // correct for the overflow. This case is a bit trickier than the signed + // case, though, as the (output + n) addition itself can overflow; however, + // note that + // (output + n) >> shift == (((n - output) >> 1) + output) >> (shift - 1), + // which is overflow-free. See Hacker's Delight, section 10-8 for details. + + masm.Sub(const64, lhs64, output64); + masm.Add(output64, output64, Operand(const64, vixl::LSR, 1)); + masm.Lsr(output64, output64, rmc.shiftAmount - 1); + } else { + masm.Lsr(output64, output64, rmc.shiftAmount); + } +} + +void CodeGenerator::visitUDivConstantI64(LUDivConstantI64* ins) { + uint64_t d = ins->denominator(); + + if (d == 0) { + masm.wasmTrap(wasm::Trap::IntegerDivideByZero, ins->mir()->trapSiteDesc()); + return; + } + + // Compute the truncated division result. + UnsignedDivide64WithConstant(masm, ins); +} + void CodeGenerator::visitModI(LModI* ins) { Register lhs = ToRegister(ins->lhs()); Register rhs = ToRegister(ins->rhs()); @@ -787,6 +954,54 @@ void CodeGenerator::visitModPowTwoI(LModPowTwoI* ins) { } } +void CodeGenerator::visitModPowTwoI64(LModPowTwoI64* ins) { + Register lhs = ToRegister(ins->input()); + ARMRegister lhs64 = toXRegister(ins->input()); + ARMRegister out64 = toXRegister(ins->output()); + + int32_t shift = ins->shift(); + bool canBeNegative = + !ins->mir()->isUnsigned() && ins->mir()->canBeNegativeDividend(); + + if (shift == 0) { + masm.Mov(out64, xzr); + return; + } + + auto clearHighBits = [&](ARMRegister reg) { + switch (shift) { + case 32: + masm.Mov(out64.W(), reg.W()); + break; + default: + masm.And(out64, reg, Operand((uint64_t(1) << shift) - 1)); + break; + } + }; + + Label negative; + if (canBeNegative) { + // Switch based on sign of the lhs. + // Positive numbers are just a bitmask. + masm.branchTestPtr(Assembler::Signed, lhs, lhs, &negative); + } + + clearHighBits(lhs64); + + if (canBeNegative) { + Label done; + masm.jump(&done); + + // Negative numbers need a negate, bitmask, negate. + masm.bind(&negative); + masm.Neg(out64, Operand(lhs64)); + clearHighBits(out64); + masm.Neg(out64, Operand(out64)); + + masm.bind(&done); + } +} + void CodeGenerator::visitModConstantI(LModConstantI* ins) { Register lhs = ToRegister(ins->numerator()); ARMRegister lhs32 = toWRegister(ins->numerator()); @@ -866,6 +1081,54 @@ void CodeGenerator::visitUModConstant(LUModConstant* ins) { } } +void CodeGenerator::visitModConstantI64(LModConstantI64* ins) { + ARMRegister lhs64 = toXRegister(ins->numerator()); + ARMRegister output64 = toXRegister(ins->output()); + + int64_t d = ins->denominator(); + + if (d == 0) { + masm.wasmTrap(wasm::Trap::IntegerDivideByZero, ins->mir()->trapSiteDesc()); + return; + } + + // Compute the truncated division result in output64. + Divide64WithConstant(masm, ins); + + // Compute the remainder: output = lhs - (output * rhs). + { + vixl::UseScratchRegisterScope temps(&masm.asVIXL()); + ARMRegister rhs64 = temps.AcquireX(); + + masm.Mov(rhs64, d); + masm.Msub(output64, output64, rhs64, lhs64); + } +} + +void CodeGenerator::visitUModConstantI64(LUModConstantI64* ins) { + ARMRegister lhs64 = toXRegister(ins->numerator()); + ARMRegister output64 = toXRegister(ins->output()); + + uint64_t d = ins->denominator(); + + if (d == 0) { + masm.wasmTrap(wasm::Trap::IntegerDivideByZero, ins->mir()->trapSiteDesc()); + return; + } + + // Compute the truncated division result in output64. + UnsignedDivide64WithConstant(masm, ins); + + // Compute the remainder: output = lhs - (output * rhs). + { + vixl::UseScratchRegisterScope temps(&masm.asVIXL()); + ARMRegister rhs64 = temps.AcquireX(); + + masm.Mov(rhs64, d); + masm.Msub(output64, output64, rhs64, lhs64); + } +} + void CodeGeneratorARM64::emitBigIntPtrDiv(LBigIntPtrDiv* ins, Register dividend, Register divisor, Register output) { // Callers handle division by zero and integer overflow. diff --git a/js/src/jit/arm64/Lowering-arm64.cpp b/js/src/jit/arm64/Lowering-arm64.cpp @@ -294,27 +294,98 @@ void LIRGeneratorARM64::lowerModI(MMod* mod) { } void LIRGeneratorARM64::lowerDivI64(MDiv* div) { + if (div->rhs()->isConstant()) { + LAllocation lhs = useRegister(div->lhs()); + int64_t rhs = div->rhs()->toConstant()->toInt64(); + + if (mozilla::IsPowerOfTwo(mozilla::Abs(rhs))) { + int32_t shift = mozilla::FloorLog2(mozilla::Abs(rhs)); + + auto* lir = new (alloc()) LDivPowTwoI64(lhs, shift, rhs < 0); + define(lir, div); + return; + } + + auto* lir = new (alloc()) LDivConstantI64(lhs, rhs); + define(lir, div); + return; + } + auto* lir = new (alloc()) LDivI64(useRegisterAtStart(div->lhs()), useRegisterAtStart(div->rhs())); - defineInt64(lir, div); + define(lir, div); } void LIRGeneratorARM64::lowerModI64(MMod* mod) { - auto* lir = - new (alloc()) LModI64(useRegister(mod->lhs()), useRegister(mod->rhs())); - defineInt64(lir, mod); + LAllocation lhs = useRegister(mod->lhs()); + + if (mod->rhs()->isConstant()) { + int64_t rhs = mod->rhs()->toConstant()->toInt64(); + + if (mozilla::IsPowerOfTwo(mozilla::Abs(rhs))) { + int32_t shift = mozilla::FloorLog2(mozilla::Abs(rhs)); + + auto* lir = new (alloc()) LModPowTwoI64(lhs, shift); + define(lir, mod); + return; + } + + auto* lir = new (alloc()) LModConstantI64(lhs, rhs); + define(lir, mod); + return; + } + + auto* lir = new (alloc()) LModI64(lhs, useRegister(mod->rhs())); + define(lir, mod); } void LIRGeneratorARM64::lowerUDivI64(MDiv* div) { + if (div->rhs()->isConstant()) { + LAllocation lhs = useRegister(div->lhs()); + + // NOTE: the result of toInt64 is coerced to uint64_t. + uint64_t rhs = div->rhs()->toConstant()->toInt64(); + + if (mozilla::IsPowerOfTwo(rhs)) { + int32_t shift = mozilla::FloorLog2(rhs); + + auto* lir = new (alloc()) LDivPowTwoI64(lhs, shift, false); + define(lir, div); + return; + } + + auto* lir = new (alloc()) LUDivConstantI64(lhs, rhs); + define(lir, div); + return; + } + auto* lir = new (alloc()) LUDivI64(useRegisterAtStart(div->lhs()), useRegisterAtStart(div->rhs())); - defineInt64(lir, div); + define(lir, div); } void LIRGeneratorARM64::lowerUModI64(MMod* mod) { - auto* lir = - new (alloc()) LUModI64(useRegister(mod->lhs()), useRegister(mod->rhs())); - defineInt64(lir, mod); + LAllocation lhs = useRegister(mod->lhs()); + + if (mod->rhs()->isConstant()) { + // NOTE: the result of toInt64 is coerced to uint64_t. + uint64_t rhs = mod->rhs()->toConstant()->toInt64(); + + if (mozilla::IsPowerOfTwo(rhs)) { + int32_t shift = mozilla::FloorLog2(rhs); + + auto* lir = new (alloc()) LModPowTwoI64(lhs, shift); + define(lir, mod); + return; + } + + auto* lir = new (alloc()) LUModConstantI64(lhs, rhs); + define(lir, mod); + return; + } + + auto* lir = new (alloc()) LUModI64(lhs, useRegister(mod->rhs())); + define(lir, mod); } void LIRGeneratorARM64::lowerWasmBuiltinDivI64(MWasmBuiltinDivI64* div) {