[ tor-browser ].git.dasho

commit 09e6a576a304ef061aac062e67b5b776653440b6
parent 81a2dd4de1abc2b3fdfb87a2bb509cd7e32142ed
Author: André Bargull <andre.bargull@gmail.com>
Date:   Mon, 10 Nov 2025 15:13:43 +0000

Bug 1998457 - Part 3: Optimise int64 divison and modulus with constants in x64. r=spidermonkey-reviewers,iain

These LIR instructions match their 32-bit counterparts in x86-shared.

Differential Revision: https://phabricator.services.mozilla.com/D271438

Diffstat:
M js/src/jit/LIROps.yaml  | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
M js/src/jit/x64/CodeGenerator-x64.cpp  | 294 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M js/src/jit/x64/Lowering-x64.cpp  | 110 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------

3 files changed, 446 insertions(+), 12 deletions(-)
diff --git a/js/src/jit/LIROps.yaml b/js/src/jit/LIROps.yaml
@@ -4322,6 +4322,60 @@
   num_temps: 1
   mir_op: Mod
 
+- name: DivPowTwoI64
+  result_type: WordSized
+  operands:
+    numerator: WordSized
+    numeratorCopy: WordSized
+  arguments:
+    shift: int32_t
+    negativeDivisor: bool
+  mir_op: Div
+
+- name: ModPowTwoI64
+  result_type: WordSized
+  operands:
+    input: WordSized
+  arguments:
+    shift: int32_t
+  mir_op: Mod
+
+- name: DivConstantI64
+  result_type: WordSized
+  operands:
+    numerator: WordSized
+  arguments:
+    denominator: int64_t
+  num_temps: 1
+  mir_op: Div
+
+- name: ModConstantI64
+  result_type: WordSized
+  operands:
+    numerator: WordSized
+  arguments:
+    denominator: int64_t
+  num_temps: 1
+  mir_op: Mod
+
+- name: UDivConstantI64
+  result_type: WordSized
+  operands:
+    numerator: WordSized
+  arguments:
+    denominator: uint64_t
+  num_temps: 1
+  mir_op: Div
+
+- name: UModConstantI64
+  result_type: WordSized
+  operands:
+    numerator: WordSized
+  arguments:
+    denominator: uint64_t
+  num_temps: 1
+  mir_op: Mod
+
 - name: WasmTruncateToInt64
   result_type: Int64
   operands:
diff --git a/js/src/jit/x64/CodeGenerator-x64.cpp b/js/src/jit/x64/CodeGenerator-x64.cpp
@@ -12,6 +12,7 @@
 #include "jit/CodeGenerator.h"
 #include "jit/MIR-wasm.h"
 #include "jit/MIR.h"
+#include "jit/ReciprocalMulConstants.h"
 #include "js/ScalarType.h"  // js::Scalar::Type
 
 #include "jit/MacroAssembler-inl.h"
@@ -308,6 +309,299 @@ void CodeGenerator::visitUModI64(LUModI64* lir) {
   masm.udivq(rhs);
 }
 
+void CodeGenerator::visitDivPowTwoI64(LDivPowTwoI64* ins) {
+  Register lhs = ToRegister(ins->numerator());
+
+  int32_t shift = ins->shift();
+  bool negativeDivisor = ins->negativeDivisor();
+  MDiv* mir = ins->mir();
+
+  // We use defineReuseInput so these should always be the same, which is
+  // convenient since all of our instructions here are two-address.
+  MOZ_ASSERT(lhs == ToRegister(ins->output()));
+
+  // Unsigned division is just a right-shift.
+  if (mir->isUnsigned()) {
+    if (shift != 0) {
+      masm.shrq(Imm32(shift), lhs);
+    }
+    return;
+  }
+
+  if (shift != 0) {
+    // Adjust the value so that shifting produces a correctly rounded result
+    // when the numerator is negative.
+    // See 10-1 "Signed Division by a Known Power of 2" in Henry S. Warren,
+    // Jr.'s Hacker's Delight.
+    if (mir->canBeNegativeDividend()) {
+      Register lhsCopy = ToRegister(ins->numeratorCopy());
+      MOZ_ASSERT(lhsCopy != lhs);
+      if (shift > 1) {
+        // Copy the sign bit of the numerator. (= (2^63 - 1) or 0)
+        masm.sarq(Imm32(63), lhs);
+      }
+      // Divide by 2^(64 - shift)
+      // i.e. (= (2^64 - 1) / 2^(64 - shift) or 0)
+      // i.e. (= (2^shift - 1) or 0)
+      masm.shrq(Imm32(64 - shift), lhs);
+      // If signed, make any 1 bit below the shifted bits to bubble up, such
+      // that once shifted the value would be rounded towards 0.
+      masm.addq(lhsCopy, lhs);
+    }
+    masm.sarq(Imm32(shift), lhs);
+  }
+
+  if (negativeDivisor) {
+    masm.negq(lhs);
+  }
+
+  if (shift == 0 && negativeDivisor) {
+    // INT64_MIN / -1 overflows.
+    Label ok;
+    masm.j(Assembler::NoOverflow, &ok);
+    masm.wasmTrap(wasm::Trap::IntegerOverflow, mir->trapSiteDesc());
+    masm.bind(&ok);
+  }
+}
+
+void CodeGenerator::visitModPowTwoI64(LModPowTwoI64* ins) {
+  Register64 lhs = Register64(ToRegister(ins->input()));
+  int32_t shift = ins->shift();
+  bool canBeNegative =
+      !ins->mir()->isUnsigned() && ins->mir()->canBeNegativeDividend();
+
+  MOZ_ASSERT(lhs.reg == ToRegister(ins->output()));
+
+  if (shift == 0) {
+    masm.xorl(lhs.reg, lhs.reg);
+    return;
+  }
+
+  auto clearHighBits = [&]() {
+    switch (shift) {
+      case 8:
+        masm.movzbl(lhs.reg, lhs.reg);
+        break;
+      case 16:
+        masm.movzwl(lhs.reg, lhs.reg);
+        break;
+      case 32:
+        masm.movl(lhs.reg, lhs.reg);
+        break;
+      default:
+        masm.and64(Imm64((uint64_t(1) << shift) - 1), lhs);
+        break;
+    }
+  };
+
+  Label negative;
+
+  if (canBeNegative) {
+    // Switch based on sign of the lhs.
+    // Positive numbers are just a bitmask
+    masm.branchTest64(Assembler::Signed, lhs, lhs, &negative);
+  }
+
+  clearHighBits();
+
+  if (canBeNegative) {
+    Label done;
+    masm.jump(&done);
+
+    // Negative numbers need a negate, bitmask, negate
+    masm.bind(&negative);
+
+    // Unlike in the visitModI64 case, we are not computing the mod by means of
+    // a division. Therefore, the divisor = -1 case isn't problematic (the andq
+    // always returns 0, which is what we expect).
+    //
+    // The negq instruction overflows if lhs == INT64_MIN, but this is also not
+    // a problem: shift is at most 63, and so the andq also always returns 0.
+    masm.neg64(lhs);
+    clearHighBits();
+    masm.neg64(lhs);
+
+    masm.bind(&done);
+  }
+}
+
+template <class LDivOrMod>
+static void Divide64WithConstant(MacroAssembler& masm, LDivOrMod* ins) {
+  Register lhs = ToRegister(ins->numerator());
+  [[maybe_unused]] Register output = ToRegister(ins->output());
+  [[maybe_unused]] Register temp = ToRegister(ins->temp0());
+  int64_t d = ins->denominator();
+
+  MOZ_ASSERT(lhs != rax && lhs != rdx);
+  MOZ_ASSERT((output == rax && temp == rdx) || (output == rdx && temp == rax));
+
+  // The absolute value of the denominator isn't a power of 2 (see LDivPowTwoI64
+  // and LModPowTwoI64).
+  MOZ_ASSERT(!mozilla::IsPowerOfTwo(mozilla::Abs(d)));
+
+  auto* mir = ins->mir();
+
+  // We will first divide by Abs(d), and negate the answer if d is negative.
+  // If desired, this can be avoided by generalizing computeDivisionConstants.
+  auto rmc = ReciprocalMulConstants::computeSignedDivisionConstants(d);
+
+  // We first compute (M * n) >> 64, where M = rmc.multiplier.
+  masm.movq(ImmWord(uint64_t(rmc.multiplier)), rax);
+  masm.imulq(lhs);
+  if (rmc.multiplier > __int128_t(INT64_MAX)) {
+    MOZ_ASSERT(rmc.multiplier < (__int128_t(1) << 64));
+
+    // We actually computed rdx = ((int64_t(M) * n) >> 64) instead. Since
+    // (M * n) >> 64 is the same as (rdx + n), we can correct for the overflow.
+    // (rdx + n) can't overflow, as n and rdx have opposite signs because
+    // int64_t(M) is negative.
+    masm.addq(lhs, rdx);
+  }
+  // (M * n) >> (64 + shift) is the truncated division answer if n is
+  // non-negative, as proved in the comments of computeDivisionConstants. We
+  // must add 1 later if n is negative to get the right answer in all cases.
+  if (rmc.shiftAmount > 0) {
+    masm.sarq(Imm32(rmc.shiftAmount), rdx);
+  }
+
+  // We'll subtract -1 instead of adding 1, because (n < 0 ? -1 : 0) can be
+  // computed with just a sign-extending shift of 63 bits.
+  if (mir->canBeNegativeDividend()) {
+    masm.movq(lhs, rax);
+    masm.sarq(Imm32(63), rax);
+    masm.subq(rax, rdx);
+  }
+
+  // After this, rdx contains the correct truncated division result.
+  if (d < 0) {
+    masm.negq(rdx);
+  }
+}
+
+void CodeGenerator::visitDivConstantI64(LDivConstantI64* ins) {
+  int32_t d = ins->denominator();
+
+  // This emits the division answer into rdx.
+  MOZ_ASSERT(ToRegister(ins->output()) == rdx);
+  MOZ_ASSERT(ToRegister(ins->temp0()) == rax);
+
+  if (d == 0) {
+    masm.wasmTrap(wasm::Trap::IntegerDivideByZero, ins->mir()->trapSiteDesc());
+    return;
+  }
+
+  // Compute the truncated division result in rdx.
+  Divide64WithConstant(masm, ins);
+}
+
+void CodeGenerator::visitModConstantI64(LModConstantI64* ins) {
+  Register lhs = ToRegister(ins->numerator());
+  int64_t d = ins->denominator();
+
+  // This emits the modulus answer into rax.
+  MOZ_ASSERT(ToRegister(ins->output()) == rax);
+  MOZ_ASSERT(ToRegister(ins->temp0()) == rdx);
+
+  if (d == 0) {
+    masm.wasmTrap(wasm::Trap::IntegerDivideByZero, ins->mir()->trapSiteDesc());
+    return;
+  }
+
+  // Compute the truncated division result in rdx.
+  Divide64WithConstant(masm, ins);
+
+  // Compute the remainder in |rax|: rax = lhs - d * rdx
+  masm.mul64(Imm64(d), Register64(rdx));
+  masm.movq(lhs, rax);
+  masm.subq(rdx, rax);
+}
+
+template <class LUDivOrUMod>
+static void UnsignedDivide64WithConstant(MacroAssembler& masm,
+                                         LUDivOrUMod* ins) {
+  Register lhs = ToRegister(ins->numerator());
+  [[maybe_unused]] Register output = ToRegister(ins->output());
+  [[maybe_unused]] Register temp = ToRegister(ins->temp0());
+  uint64_t d = ins->denominator();
+
+  MOZ_ASSERT(lhs != rax && lhs != rdx);
+  MOZ_ASSERT((output == rax && temp == rdx) || (output == rdx && temp == rax));
+
+  // The denominator isn't a power of 2 (see LDivPowTwoI and LModPowTwoI).
+  MOZ_ASSERT(!mozilla::IsPowerOfTwo(d));
+
+  auto rmc = ReciprocalMulConstants::computeUnsignedDivisionConstants(d);
+
+  // We first compute (M * n) >> 64, where M = rmc.multiplier.
+  masm.movq(ImmWord(uint64_t(rmc.multiplier)), rax);
+  masm.umulq(lhs);
+  if (rmc.multiplier > __int128_t(UINT64_MAX)) {
+    // M >= 2^64 and shift == 0 is impossible, as d >= 2 implies that
+    // ((M * n) >> (64 + shift)) >= n > floor(n/d) whenever n >= d,
+    // contradicting the proof of correctness in computeDivisionConstants.
+    MOZ_ASSERT(rmc.shiftAmount > 0);
+    MOZ_ASSERT(rmc.multiplier < (__int128_t(1) << 65));
+
+    // We actually computed rdx = ((uint64_t(M) * n) >> 64) instead. Since
+    // (M * n) >> (64 + shift) is the same as (rdx + n) >> shift, we can correct
+    // for the overflow. This case is a bit trickier than the signed case,
+    // though, as the (rdx + n) addition itself can overflow; however, note that
+    // (rdx + n) >> shift == (((n - rdx) >> 1) + rdx) >> (shift - 1),
+    // which is overflow-free. See Hacker's Delight, section 10-8 for details.
+
+    // Compute (n - rdx) >> 1 into temp.
+    masm.movq(lhs, rax);
+    masm.subq(rdx, rax);
+    masm.shrq(Imm32(1), rax);
+
+    // Finish the computation.
+    masm.addq(rax, rdx);
+    masm.shrq(Imm32(rmc.shiftAmount - 1), rdx);
+  } else {
+    if (rmc.shiftAmount > 0) {
+      masm.shrq(Imm32(rmc.shiftAmount), rdx);
+    }
+  }
+}
+
+void CodeGenerator::visitUDivConstantI64(LUDivConstantI64* ins) {
+  uint64_t d = ins->denominator();
+
+  // This emits the division answer into rdx.
+  MOZ_ASSERT(ToRegister(ins->output()) == rdx);
+  MOZ_ASSERT(ToRegister(ins->temp0()) == rax);
+
+  if (d == 0) {
+    masm.wasmTrap(wasm::Trap::IntegerDivideByZero, ins->mir()->trapSiteDesc());
+    return;
+  }
+
+  // Compute the truncated division result in rdx.
+  UnsignedDivide64WithConstant(masm, ins);
+}
+
+void CodeGenerator::visitUModConstantI64(LUModConstantI64* ins) {
+  Register lhs = ToRegister(ins->numerator());
+  uint64_t d = ins->denominator();
+
+  // This emits the modulus answer into rax.
+  MOZ_ASSERT(ToRegister(ins->output()) == rax);
+  MOZ_ASSERT(ToRegister(ins->temp0()) == rdx);
+
+  if (d == 0) {
+    masm.wasmTrap(wasm::Trap::IntegerDivideByZero, ins->mir()->trapSiteDesc());
+    return;
+  }
+
+  // Compute the truncated division result in rdx.
+  UnsignedDivide64WithConstant(masm, ins);
+
+  // Compute the remainder in |rax|: rax = lhs - d * rdx
+  masm.mul64(Imm64(d), Register64(rdx));
+  masm.movq(lhs, rax);
+  masm.subq(rdx, rax);
+}
+
 void CodeGeneratorX64::emitBigIntPtrDiv(LBigIntPtrDiv* ins, Register dividend,
                                         Register divisor, Register output) {
   // Callers handle division by zero and integer overflow.
diff --git a/js/src/jit/x64/Lowering-x64.cpp b/js/src/jit/x64/Lowering-x64.cpp
@@ -7,6 +7,7 @@
 #include "jit/x64/Lowering-x64.h"
 
 #include "mozilla/CheckedInt.h"
+#include "mozilla/MathAlgorithms.h"
 
 #include "jit/Lowering.h"
 #include "jit/MIR-wasm.h"
@@ -526,35 +527,120 @@ void LIRGenerator::visitSubstr(MSubstr* ins) {
 }
 
 void LIRGeneratorX64::lowerDivI64(MDiv* div) {
+  // Division instructions are slow. Division by constant denominators can be
+  // rewritten to use other instructions.
+  if (div->rhs()->isConstant()) {
+    int64_t rhs = div->rhs()->toConstant()->toInt64();
+
+    // Division by powers of two can be done by shifting, and division by
+    // other numbers can be done by a reciprocal multiplication technique.
+    if (mozilla::IsPowerOfTwo(mozilla::Abs(rhs))) {
+      int32_t shift = mozilla::FloorLog2(mozilla::Abs(rhs));
+      LAllocation lhs = useRegisterAtStart(div->lhs());
+
+      // We have to round the result toward 0 when the remainder is non-zero.
+      // This requires an extra register to round up/down when the left-hand
+      // side is signed.
+      LAllocation lhsCopy = div->canBeNegativeDividend()
+                                ? useRegister(div->lhs())
+                                : LAllocation();
+
+      auto* lir = new (alloc()) LDivPowTwoI64(lhs, lhsCopy, shift, rhs < 0);
+      defineReuseInput(lir, div, 0);
+      return;
+    }
+
+    auto* lir = new (alloc())
+        LDivConstantI64(useRegister(div->lhs()), tempFixed(rax), rhs);
+    defineFixed(lir, div, LAllocation(AnyRegister(rdx)));
+    return;
+  }
+
   auto* lir = new (alloc()) LDivI64(useFixedAtStart(div->lhs(), rax),
                                     useRegister(div->rhs()), tempFixed(rdx));
-  defineInt64Fixed(lir, div, LInt64Allocation(LAllocation(AnyRegister(rax))));
-}
-
-void LIRGeneratorX64::lowerWasmBuiltinDivI64(MWasmBuiltinDivI64* div) {
-  MOZ_CRASH("We don't use runtime div for this architecture");
+  defineFixed(lir, div, LAllocation(AnyRegister(rax)));
 }
 
 void LIRGeneratorX64::lowerModI64(MMod* mod) {
+  if (mod->rhs()->isConstant()) {
+    int64_t rhs = mod->rhs()->toConstant()->toInt64();
+
+    if (mozilla::IsPowerOfTwo(mozilla::Abs(rhs))) {
+      int32_t shift = mozilla::FloorLog2(mozilla::Abs(rhs));
+
+      auto* lir =
+          new (alloc()) LModPowTwoI64(useRegisterAtStart(mod->lhs()), shift);
+      defineReuseInput(lir, mod, 0);
+      return;
+    }
+
+    auto* lir = new (alloc())
+        LModConstantI64(useRegister(mod->lhs()), tempFixed(rdx), rhs);
+    defineFixed(lir, mod, LAllocation(AnyRegister(rax)));
+    return;
+  }
+
   auto* lir = new (alloc()) LModI64(useFixedAtStart(mod->lhs(), rax),
                                     useRegister(mod->rhs()), tempFixed(rax));
-  defineInt64Fixed(lir, mod, LInt64Allocation(LAllocation(AnyRegister(rdx))));
-}
-
-void LIRGeneratorX64::lowerWasmBuiltinModI64(MWasmBuiltinModI64* mod) {
-  MOZ_CRASH("We don't use runtime mod for this architecture");
+  defineFixed(lir, mod, LAllocation(AnyRegister(rdx)));
 }
 
 void LIRGeneratorX64::lowerUDivI64(MDiv* div) {
+  if (div->rhs()->isConstant()) {
+    // NOTE: the result of toInt64 is coerced to uint64_t.
+    uint64_t rhs = div->rhs()->toConstant()->toInt64();
+
+    if (mozilla::IsPowerOfTwo(rhs)) {
+      int32_t shift = mozilla::FloorLog2(rhs);
+
+      auto* lir = new (alloc()) LDivPowTwoI64(useRegisterAtStart(div->lhs()),
+                                              LAllocation(), shift, false);
+      defineReuseInput(lir, div, 0);
+      return;
+    }
+
+    auto* lir = new (alloc())
+        LUDivConstantI64(useRegister(div->lhs()), tempFixed(rax), rhs);
+    defineFixed(lir, div, LAllocation(AnyRegister(rdx)));
+    return;
+  }
+
   auto* lir = new (alloc()) LUDivI64(useFixedAtStart(div->lhs(), rax),
                                      useRegister(div->rhs()), tempFixed(rdx));
-  defineInt64Fixed(lir, div, LInt64Allocation(LAllocation(AnyRegister(rax))));
+  defineFixed(lir, div, LAllocation(AnyRegister(rax)));
 }
 
 void LIRGeneratorX64::lowerUModI64(MMod* mod) {
+  if (mod->rhs()->isConstant()) {
+    // NOTE: the result of toInt64 is coerced to uint64_t.
+    uint64_t rhs = mod->rhs()->toConstant()->toInt64();
+
+    if (mozilla::IsPowerOfTwo(rhs)) {
+      int32_t shift = mozilla::FloorLog2(rhs);
+
+      auto* lir =
+          new (alloc()) LModPowTwoI64(useRegisterAtStart(mod->lhs()), shift);
+      defineReuseInput(lir, mod, 0);
+      return;
+    }
+
+    auto* lir = new (alloc())
+        LUModConstantI64(useRegister(mod->lhs()), tempFixed(rdx), rhs);
+    defineFixed(lir, mod, LAllocation(AnyRegister(rax)));
+    return;
+  }
+
   auto* lir = new (alloc()) LUModI64(useFixedAtStart(mod->lhs(), rax),
                                      useRegister(mod->rhs()), tempFixed(rax));
-  defineInt64Fixed(lir, mod, LInt64Allocation(LAllocation(AnyRegister(rdx))));
+  defineFixed(lir, mod, LAllocation(AnyRegister(rdx)));
+}
+
+void LIRGeneratorX64::lowerWasmBuiltinDivI64(MWasmBuiltinDivI64* div) {
+  MOZ_CRASH("We don't use runtime div for this architecture");
+}
+
+void LIRGeneratorX64::lowerWasmBuiltinModI64(MWasmBuiltinModI64* mod) {
+  MOZ_CRASH("We don't use runtime mod for this architecture");
 }
 
 void LIRGeneratorX64::lowerBigIntPtrDiv(MBigIntPtrDiv* ins) {

	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE

M	js/src/jit/LIROps.yaml	\|	54	++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	js/src/jit/x64/CodeGenerator-x64.cpp	\|	294	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	js/src/jit/x64/Lowering-x64.cpp	\|	110	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------