[ tor-browser ].git.dasho

commit 646effac1b0c6a423966751b0f086a0b154c20a4
parent 84f5d2f54b355cf304b9cf93a876abd4b74310b7
Author: André Bargull <andre.bargull@gmail.com>
Date:   Sat, 15 Nov 2025 10:15:24 +0000

Bug 1998457 - Part 4: Optimise int64 divison and modulus with constants in arm64. r=spidermonkey-reviewers,iain

Differential Revision: https://phabricator.services.mozilla.com/D271439

Diffstat:
M js/src/jit/LIROps.yaml  | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++----
M js/src/jit/arm64/CodeGenerator-arm64.cpp  | 267 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
M js/src/jit/arm64/Lowering-arm64.cpp  | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------

3 files changed, 397 insertions(+), 14 deletions(-)
diff --git a/js/src/jit/LIROps.yaml b/js/src/jit/LIROps.yaml
@@ -4520,28 +4520,28 @@
 
 #ifdef JS_CODEGEN_ARM64
 - name: DivI64
-  result_type: Int64
+  result_type: WordSized
   operands:
     lhs: WordSized
     rhs: WordSized
   mir_op: Div
 
 - name: ModI64
-  result_type: Int64
+  result_type: WordSized
   operands:
     lhs: WordSized
     rhs: WordSized
   mir_op: Mod
 
 - name: UDivI64
-  result_type: Int64
+  result_type: WordSized
   operands:
     lhs: WordSized
     rhs: WordSized
   mir_op: Div
 
 - name: UModI64
-  result_type: Int64
+  result_type: WordSized
   operands:
     lhs: WordSized
     rhs: WordSized
@@ -4579,6 +4579,55 @@
     denominator: uint32_t
   mir_op: Mod
 
+- name: DivPowTwoI64
+  result_type: WordSized
+  operands:
+    numerator: WordSized
+  arguments:
+    shift: int32_t
+    negativeDivisor: bool
+  mir_op: Div
+
+- name: ModPowTwoI64
+  result_type: WordSized
+  operands:
+    input: WordSized
+  arguments:
+    shift: int32_t
+  mir_op: Mod
+
+- name: DivConstantI64
+  result_type: WordSized
+  operands:
+    numerator: WordSized
+  arguments:
+    denominator: int64_t
+  mir_op: Div
+
+- name: ModConstantI64
+  result_type: WordSized
+  operands:
+    numerator: WordSized
+  arguments:
+    denominator: int64_t
+  mir_op: Mod
+
+- name: UDivConstantI64
+  result_type: WordSized
+  operands:
+    numerator: WordSized
+  arguments:
+    denominator: uint64_t
+  mir_op: Div
+
+- name: UModConstantI64
+  result_type: WordSized
+  operands:
+    numerator: WordSized
+  arguments:
+    denominator: uint64_t
+  mir_op: Mod
+
 - name: UDiv
   result_type: WordSized
   operands:
diff --git a/js/src/jit/arm64/CodeGenerator-arm64.cpp b/js/src/jit/arm64/CodeGenerator-arm64.cpp
@@ -506,6 +506,61 @@ void CodeGenerator::visitDivPowTwoI(LDivPowTwoI* ins) {
   }
 }
 
+void CodeGenerator::visitDivPowTwoI64(LDivPowTwoI64* ins) {
+  ARMRegister numerator64 = toXRegister(ins->numerator());
+  ARMRegister output64 = toXRegister(ins->output());
+
+  int32_t shift = ins->shift();
+  bool negativeDivisor = ins->negativeDivisor();
+  MDiv* mir = ins->mir();
+
+  if (shift) {
+    if (mir->isUnsigned()) {
+      // shift right
+      masm.Lsr(output64, numerator64, shift);
+    } else {
+      ARMRegister temp64 = numerator64;
+      // Adjust the value so that shifting produces a correctly
+      // rounded result when the numerator is negative. See 10-1
+      // "Signed Division by a Known Power of 2" in Henry
+      // S. Warren, Jr.'s Hacker's Delight.
+      if (mir->canBeNegativeDividend()) {
+        if (shift > 1) {
+          // Copy the sign bit of the numerator. (= (2^64 - 1) or 0)
+          masm.Asr(output64, numerator64, 63);
+          temp64 = output64;
+        }
+        // Divide by 2^(64 - shift)
+        // i.e. (= (2^64 - 1) / 2^(64 - shift) or 0)
+        // i.e. (= (2^shift - 1) or 0)
+        masm.Lsr(output64, temp64, 64 - shift);
+        // If signed, make any 1 bit below the shifted bits to bubble up, such
+        // that once shifted the value would be rounded towards 0.
+        masm.Add(output64, output64, numerator64);
+        temp64 = output64;
+      }
+      masm.Asr(output64, temp64, shift);
+
+      if (negativeDivisor) {
+        masm.Neg(output64, output64);
+      }
+    }
+    return;
+  }
+
+  if (negativeDivisor) {
+    // INT64_MIN / -1 overflows.
+    Label ok;
+    masm.Negs(output64, numerator64);
+    masm.branch(Assembler::NoOverflow, &ok);
+    masm.wasmTrap(wasm::Trap::IntegerOverflow, mir->trapSiteDesc());
+    masm.bind(&ok);
+  } else {
+    // Copy the result.
+    masm.Mov(output64, numerator64);
+  }
+}
+
 template <class LDivOrMod>
 static void DivideWithConstant(MacroAssembler& masm, LDivOrMod* ins) {
   ARMRegister lhs32 = toWRegister(ins->numerator());
@@ -553,8 +608,7 @@ static void DivideWithConstant(MacroAssembler& masm, LDivOrMod* ins) {
   // We'll subtract -1 instead of adding 1, because (n < 0 ? -1 : 0) can be
   // computed with just a sign-extending shift of 31 bits.
   if (mir->canBeNegativeDividend()) {
-    masm.Asr(const32, lhs32, 31);
-    masm.Sub(output32, output32, const32);
+    masm.Sub(output32, output32, Operand(lhs32, vixl::ASR, 31));
   }
 
   // After this, output32 contains the correct truncated division result.
@@ -700,6 +754,119 @@ void CodeGenerator::visitUDivConstant(LUDivConstant* ins) {
   }
 }
 
+template <class LDivOrMod>
+static void Divide64WithConstant(MacroAssembler& masm, LDivOrMod* ins) {
+  ARMRegister lhs64 = toXRegister(ins->numerator());
+  ARMRegister output64 = toXRegister(ins->output());
+  int64_t d = ins->denominator();
+
+  vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+  ARMRegister const64 = temps.AcquireX();
+
+  // The absolute value of the denominator isn't a power of 2.
+  MOZ_ASSERT(!mozilla::IsPowerOfTwo(mozilla::Abs(d)));
+
+  auto* mir = ins->mir();
+
+  // We will first divide by Abs(d), and negate the answer if d is negative.
+  // If desired, this can be avoided by generalizing computeDivisionConstants.
+  auto rmc = ReciprocalMulConstants::computeSignedDivisionConstants(d);
+
+  // We first compute (M * n) >> 64, where M = rmc.multiplier.
+  masm.Mov(const64, uint64_t(rmc.multiplier));
+  masm.Smulh(output64, lhs64, const64);
+  if (rmc.multiplier > Int128(INT64_MAX)) {
+    MOZ_ASSERT(rmc.multiplier < (Int128(1) << 64));
+
+    // We actually computed output = ((int64_t(M) * n) >> 64) instead. Since
+    // (M * n) >> 64 is the same as (output + n), we can correct for the
+    // overflow. (output + n) can't overflow, as n and output have opposite
+    // signs because int64_t(M) is negative.
+    masm.Add(output64, output64, lhs64);
+  }
+
+  // (M * n) >> (64 + shift) is the truncated division answer if n is
+  // non-negative, as proved in the comments of computeDivisionConstants. We
+  // must add 1 later if n is negative to get the right answer in all cases.
+  masm.Asr(output64, output64, rmc.shiftAmount);
+
+  // We'll subtract -1 instead of adding 1, because (n < 0 ? -1 : 0) can be
+  // computed with just a sign-extending shift of 63 bits.
+  if (mir->canBeNegativeDividend()) {
+    masm.Sub(output64, output64, Operand(lhs64, vixl::ASR, 63));
+  }
+
+  // After this, output64 contains the correct truncated division result.
+  if (d < 0) {
+    masm.Neg(output64, output64);
+  }
+}
+
+void CodeGenerator::visitDivConstantI64(LDivConstantI64* ins) {
+  int64_t d = ins->denominator();
+
+  if (d == 0) {
+    masm.wasmTrap(wasm::Trap::IntegerDivideByZero, ins->mir()->trapSiteDesc());
+    return;
+  }
+
+  // Compute the truncated division result.
+  Divide64WithConstant(masm, ins);
+}
+
+template <class LUDivOrUMod>
+static void UnsignedDivide64WithConstant(MacroAssembler& masm,
+                                         LUDivOrUMod* ins) {
+  ARMRegister lhs64 = toXRegister(ins->numerator());
+  ARMRegister output64 = toXRegister(ins->output());
+  uint64_t d = ins->denominator();
+
+  vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+  ARMRegister const64 = temps.AcquireX();
+
+  // The denominator isn't a power of 2 (see LDivPowTwoI).
+  MOZ_ASSERT(!mozilla::IsPowerOfTwo(d));
+
+  auto rmc = ReciprocalMulConstants::computeUnsignedDivisionConstants(d);
+
+  // We first compute (M * n) >> 64, where M = rmc.multiplier.
+  masm.Mov(const64, uint64_t(rmc.multiplier));
+  masm.Umulh(output64, lhs64, const64);
+  if (rmc.multiplier > Int128(UINT64_MAX)) {
+    // M >= 2^64 and shift == 0 is impossible, as d >= 2 implies that
+    // ((M * n) >> (64 + shift)) >= n > floor(n/d) whenever n >= d,
+    // contradicting the proof of correctness in computeDivisionConstants.
+    MOZ_ASSERT(rmc.shiftAmount > 0);
+    MOZ_ASSERT(rmc.multiplier < (Int128(1) << 65));
+
+    // We actually computed output = ((uint64_t(M) * n) >> 64) instead. Since
+    // (M * n) >> (64 + shift) is the same as (output + n) >> shift, we can
+    // correct for the overflow. This case is a bit trickier than the signed
+    // case, though, as the (output + n) addition itself can overflow; however,
+    // note that
+    // (output + n) >> shift == (((n - output) >> 1) + output) >> (shift - 1),
+    // which is overflow-free. See Hacker's Delight, section 10-8 for details.
+
+    masm.Sub(const64, lhs64, output64);
+    masm.Add(output64, output64, Operand(const64, vixl::LSR, 1));
+    masm.Lsr(output64, output64, rmc.shiftAmount - 1);
+  } else {
+    masm.Lsr(output64, output64, rmc.shiftAmount);
+  }
+}
+
+void CodeGenerator::visitUDivConstantI64(LUDivConstantI64* ins) {
+  uint64_t d = ins->denominator();
+
+  if (d == 0) {
+    masm.wasmTrap(wasm::Trap::IntegerDivideByZero, ins->mir()->trapSiteDesc());
+    return;
+  }
+
+  // Compute the truncated division result.
+  UnsignedDivide64WithConstant(masm, ins);
+}
+
 void CodeGenerator::visitModI(LModI* ins) {
   Register lhs = ToRegister(ins->lhs());
   Register rhs = ToRegister(ins->rhs());
@@ -787,6 +954,54 @@ void CodeGenerator::visitModPowTwoI(LModPowTwoI* ins) {
   }
 }
 
+void CodeGenerator::visitModPowTwoI64(LModPowTwoI64* ins) {
+  Register lhs = ToRegister(ins->input());
+  ARMRegister lhs64 = toXRegister(ins->input());
+  ARMRegister out64 = toXRegister(ins->output());
+
+  int32_t shift = ins->shift();
+  bool canBeNegative =
+      !ins->mir()->isUnsigned() && ins->mir()->canBeNegativeDividend();
+
+  if (shift == 0) {
+    masm.Mov(out64, xzr);
+    return;
+  }
+
+  auto clearHighBits = [&](ARMRegister reg) {
+    switch (shift) {
+      case 32:
+        masm.Mov(out64.W(), reg.W());
+        break;
+      default:
+        masm.And(out64, reg, Operand((uint64_t(1) << shift) - 1));
+        break;
+    }
+  };
+
+  Label negative;
+  if (canBeNegative) {
+    // Switch based on sign of the lhs.
+    // Positive numbers are just a bitmask.
+    masm.branchTestPtr(Assembler::Signed, lhs, lhs, &negative);
+  }
+
+  clearHighBits(lhs64);
+
+  if (canBeNegative) {
+    Label done;
+    masm.jump(&done);
+
+    // Negative numbers need a negate, bitmask, negate.
+    masm.bind(&negative);
+    masm.Neg(out64, Operand(lhs64));
+    clearHighBits(out64);
+    masm.Neg(out64, Operand(out64));
+
+    masm.bind(&done);
+  }
+}
+
 void CodeGenerator::visitModConstantI(LModConstantI* ins) {
   Register lhs = ToRegister(ins->numerator());
   ARMRegister lhs32 = toWRegister(ins->numerator());
@@ -866,6 +1081,54 @@ void CodeGenerator::visitUModConstant(LUModConstant* ins) {
   }
 }
 
+void CodeGenerator::visitModConstantI64(LModConstantI64* ins) {
+  ARMRegister lhs64 = toXRegister(ins->numerator());
+  ARMRegister output64 = toXRegister(ins->output());
+
+  int64_t d = ins->denominator();
+
+  if (d == 0) {
+    masm.wasmTrap(wasm::Trap::IntegerDivideByZero, ins->mir()->trapSiteDesc());
+    return;
+  }
+
+  // Compute the truncated division result in output64.
+  Divide64WithConstant(masm, ins);
+
+  // Compute the remainder: output = lhs - (output * rhs).
+  {
+    vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+    ARMRegister rhs64 = temps.AcquireX();
+
+    masm.Mov(rhs64, d);
+    masm.Msub(output64, output64, rhs64, lhs64);
+  }
+}
+
+void CodeGenerator::visitUModConstantI64(LUModConstantI64* ins) {
+  ARMRegister lhs64 = toXRegister(ins->numerator());
+  ARMRegister output64 = toXRegister(ins->output());
+
+  uint64_t d = ins->denominator();
+
+  if (d == 0) {
+    masm.wasmTrap(wasm::Trap::IntegerDivideByZero, ins->mir()->trapSiteDesc());
+    return;
+  }
+
+  // Compute the truncated division result in output64.
+  UnsignedDivide64WithConstant(masm, ins);
+
+  // Compute the remainder: output = lhs - (output * rhs).
+  {
+    vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+    ARMRegister rhs64 = temps.AcquireX();
+
+    masm.Mov(rhs64, d);
+    masm.Msub(output64, output64, rhs64, lhs64);
+  }
+}
+
 void CodeGeneratorARM64::emitBigIntPtrDiv(LBigIntPtrDiv* ins, Register dividend,
                                           Register divisor, Register output) {
   // Callers handle division by zero and integer overflow.
diff --git a/js/src/jit/arm64/Lowering-arm64.cpp b/js/src/jit/arm64/Lowering-arm64.cpp
@@ -294,27 +294,98 @@ void LIRGeneratorARM64::lowerModI(MMod* mod) {
 }
 
 void LIRGeneratorARM64::lowerDivI64(MDiv* div) {
+  if (div->rhs()->isConstant()) {
+    LAllocation lhs = useRegister(div->lhs());
+    int64_t rhs = div->rhs()->toConstant()->toInt64();
+
+    if (mozilla::IsPowerOfTwo(mozilla::Abs(rhs))) {
+      int32_t shift = mozilla::FloorLog2(mozilla::Abs(rhs));
+
+      auto* lir = new (alloc()) LDivPowTwoI64(lhs, shift, rhs < 0);
+      define(lir, div);
+      return;
+    }
+
+    auto* lir = new (alloc()) LDivConstantI64(lhs, rhs);
+    define(lir, div);
+    return;
+  }
+
   auto* lir = new (alloc())
       LDivI64(useRegisterAtStart(div->lhs()), useRegisterAtStart(div->rhs()));
-  defineInt64(lir, div);
+  define(lir, div);
 }
 
 void LIRGeneratorARM64::lowerModI64(MMod* mod) {
-  auto* lir =
-      new (alloc()) LModI64(useRegister(mod->lhs()), useRegister(mod->rhs()));
-  defineInt64(lir, mod);
+  LAllocation lhs = useRegister(mod->lhs());
+
+  if (mod->rhs()->isConstant()) {
+    int64_t rhs = mod->rhs()->toConstant()->toInt64();
+
+    if (mozilla::IsPowerOfTwo(mozilla::Abs(rhs))) {
+      int32_t shift = mozilla::FloorLog2(mozilla::Abs(rhs));
+
+      auto* lir = new (alloc()) LModPowTwoI64(lhs, shift);
+      define(lir, mod);
+      return;
+    }
+
+    auto* lir = new (alloc()) LModConstantI64(lhs, rhs);
+    define(lir, mod);
+    return;
+  }
+
+  auto* lir = new (alloc()) LModI64(lhs, useRegister(mod->rhs()));
+  define(lir, mod);
 }
 
 void LIRGeneratorARM64::lowerUDivI64(MDiv* div) {
+  if (div->rhs()->isConstant()) {
+    LAllocation lhs = useRegister(div->lhs());
+
+    // NOTE: the result of toInt64 is coerced to uint64_t.
+    uint64_t rhs = div->rhs()->toConstant()->toInt64();
+
+    if (mozilla::IsPowerOfTwo(rhs)) {
+      int32_t shift = mozilla::FloorLog2(rhs);
+
+      auto* lir = new (alloc()) LDivPowTwoI64(lhs, shift, false);
+      define(lir, div);
+      return;
+    }
+
+    auto* lir = new (alloc()) LUDivConstantI64(lhs, rhs);
+    define(lir, div);
+    return;
+  }
+
   auto* lir = new (alloc())
       LUDivI64(useRegisterAtStart(div->lhs()), useRegisterAtStart(div->rhs()));
-  defineInt64(lir, div);
+  define(lir, div);
 }
 
 void LIRGeneratorARM64::lowerUModI64(MMod* mod) {
-  auto* lir =
-      new (alloc()) LUModI64(useRegister(mod->lhs()), useRegister(mod->rhs()));
-  defineInt64(lir, mod);
+  LAllocation lhs = useRegister(mod->lhs());
+
+  if (mod->rhs()->isConstant()) {
+    // NOTE: the result of toInt64 is coerced to uint64_t.
+    uint64_t rhs = mod->rhs()->toConstant()->toInt64();
+
+    if (mozilla::IsPowerOfTwo(rhs)) {
+      int32_t shift = mozilla::FloorLog2(rhs);
+
+      auto* lir = new (alloc()) LModPowTwoI64(lhs, shift);
+      define(lir, mod);
+      return;
+    }
+
+    auto* lir = new (alloc()) LUModConstantI64(lhs, rhs);
+    define(lir, mod);
+    return;
+  }
+
+  auto* lir = new (alloc()) LUModI64(lhs, useRegister(mod->rhs()));
+  define(lir, mod);
 }
 
 void LIRGeneratorARM64::lowerWasmBuiltinDivI64(MWasmBuiltinDivI64* div) {

	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE

M	js/src/jit/LIROps.yaml	\|	57	+++++++++++++++++++++++++++++++++++++++++++++++++++++----
M	js/src/jit/arm64/CodeGenerator-arm64.cpp	\|	267	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
M	js/src/jit/arm64/Lowering-arm64.cpp	\|	87	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------