[ tor-browser ].git.dasho

commit 130494460295b72c62f2d7c6763cb00c89189a85
parent 979b5c3d12aa2503aabdff77ec4c8f71ed3b9d37
Author: André Bargull <andre.bargull@gmail.com>
Date:   Mon, 27 Oct 2025 09:00:01 +0000

Bug 1996345 - Part 4: Use three operand imul instruction for Int64 when possible. r=spidermonkey-reviewers,iain

Applies the changes from part 1 for Int64 multiplication.

Differential Revision: https://phabricator.services.mozilla.com/D270024

Diffstat:
M js/src/jit-test/tests/wasm/binop-x64-ion-codegen.js  | 50 +++++++++++++++++++++++++++++++++++++++++++-------
M js/src/jit-test/tests/wasm/ion-adhoc-multiplatform.js  | 43 +++++++++++++++----------------------------
M js/src/jit/LIROps.yaml  | 2 +-
M js/src/jit/x64/CodeGenerator-x64.cpp  | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------
M js/src/jit/x64/Lowering-x64.cpp  | 14 ++++++++++++--

5 files changed, 129 insertions(+), 60 deletions(-)
diff --git a/js/src/jit-test/tests/wasm/binop-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/binop-x64-ion-codegen.js
@@ -47,7 +47,7 @@ let zero64 = `(module
 codegenTestX64_adhoc(
     zero64,
     'f',
-    'xor %rax, %rax', {no_prefix:true});
+    'xor %rax, %rax');
 assertEq(wasmEvalText(zero64).exports.f(-37000000000n), 0n)
 assertEq(wasmEvalText(zero64).exports.f(42000000000n), 0n)
 
@@ -74,7 +74,7 @@ codegenTestX64_adhoc(
 assertEq(wasmEvalText(one64).exports.f(-37000000000n), -37000000000n)
 assertEq(wasmEvalText(one64).exports.f(42000000000n), 42000000000n)
 
-// Test that multiplication by two yields lea/add
+// Test that multiplication by two yields lea
 
 let double32 =
     `(module
@@ -93,11 +93,11 @@ let double64 = `(module
 codegenTestX64_adhoc(
     double64,
     'f',
-    'add %rax, %rax', {no_prefix:true});
+    'lea \\(%rdi,%rdi,1\\), %rax');
 assertEq(wasmEvalText(double64).exports.f(-37000000000n), -74000000000n)
 assertEq(wasmEvalText(double64).exports.f(42000000000n), 84000000000n)
 
-// Test that multiplication by four yields lea/shift
+// Test that multiplication by four yields lea
 
 let quad32 =
     `(module
@@ -116,11 +116,11 @@ let quad64 = `(module
 codegenTestX64_adhoc(
     quad64,
     'f',
-    'shl \\$0x02, %rax', {no_prefix:true});
+    'lea \\(,%rdi,4\\), %rax');
 assertEq(wasmEvalText(quad64).exports.f(-37000000000n), -148000000000n)
 assertEq(wasmEvalText(quad64).exports.f(42000000000n), 168000000000n)
 
-// Test that multiplication by five yields lea/imul
+// Test that multiplication by five yields lea
 
 let quint32 =
     `(module
@@ -139,10 +139,46 @@ let quint64 = `(module
 codegenTestX64_adhoc(
     quint64,
     'f',
-    `imul \\$0x05, %rax, %rax`, {no_prefix:true})
+    `lea \\(%rdi,%rdi,4\\), %rax`)
 assertEq(wasmEvalText(quint64).exports.f(-37000000000n), -37000000000n*5n)
 assertEq(wasmEvalText(quint64).exports.f(42000000000n), 42000000000n*5n)
 
+// Test that multiplication by six yields imul
+
+let sext32 =
+    `(module
+       (func (export "f") (param i32) (result i32)
+         (i32.mul (local.get 0) (i32.const 6))))`;
+codegenTestX64_adhoc(
+    sext32,
+    'f',
+    'imul \\$0x06, %edi, %eax');
+assertEq(wasmEvalText(sext32).exports.f(-37), -37*6)
+assertEq(wasmEvalText(sext32).exports.f(42), 42*6)
+
+let sext64 = `(module
+       (func (export "f") (param i64) (result i64)
+         (i64.mul (local.get 0) (i64.const 6))))`
+codegenTestX64_adhoc(
+    sext64,
+    'f',
+    `imul \\$0x06, %rdi, %rax`)
+assertEq(wasmEvalText(sext64).exports.f(-37000000000n), -37000000000n*6n)
+assertEq(wasmEvalText(sext64).exports.f(42000000000n), 42000000000n*6n)
+
+// Test that multiplication by UINT32_MAX yields imul
+
+let uint32max64 = `(module
+       (func (export "f") (param i64) (result i64)
+         (i64.mul (local.get 0) (i64.const 0xffffffff))))`
+codegenTestX64_adhoc(
+    uint32max64,
+    'f',
+    `mov \\$-0x01, %r11d
+     imul %r11, %rax`, {no_prefix:true})
+assertEq(wasmEvalText(uint32max64).exports.f(-37000000000n), BigInt.asIntN(64, -37000000000n*0xffffffffn))
+assertEq(wasmEvalText(uint32max64).exports.f(42000000000n), BigInt.asIntN(64, 42000000000n*0xffffffffn))
+
 // Test that 0-n yields negation.
 
 let subneg32 =
diff --git a/js/src/jit-test/tests/wasm/ion-adhoc-multiplatform.js b/js/src/jit-test/tests/wasm/ion-adhoc-multiplatform.js
@@ -40,18 +40,8 @@ codegenTestMultiplatform_adhoc(
     `(module (func (export "mul64_zeroL") (param $p1 i64) (result i64)
        (i64.mul (i64.const 0) (local.get $p1))))`,
     "mul64_zeroL",
-    // FIXME folding happened, zero-creation insns could be improved
-    {x64:   // First we move rdi to rax unnecessarily via rcx (bug 1752520),
-            // then we overwrite rax.  Presumably because the folding
-            // 0 * x => 0 is done at the LIR level, not the MIR level, hence
-            // the now-pointless WasmParameter node is not DCE'd away, since
-            // DCE only happens at the MIR level.  In fact all targets suffer
-            // from the latter problem, but on x86 no_prefix_x86:true
-            // hides it, and on arm32/64 the pointless move is correctly
-            // transformed by RA into a no-op.  Also, on xor, REX.W is redundant.
-            `mov %rdi, %rcx
-             mov %rcx, %rax
-             xor %rax, %rax`,
+    // FIXME zero-creation insns could be improved
+    {x64:   `xor %rax, %rax`,     // REX.W is redundant
      x86:   `xor %eax, %eax
              xor %edx, %edx`,
      arm64: `mov x0, xzr`,
@@ -64,7 +54,14 @@ codegenTestMultiplatform_adhoc(
     `(module (func (export "mul32_oneL") (param $p1 i32) (result i32)
        (i32.mul (i32.const 1) (local.get $p1))))`,
     "mul32_oneL",
-    {x64:   `mov %edi, %ecx
+    {x64:   // We move edi to eax unnecessarily via ecx (bug 1752520).
+            // Presumably because the folding 1 * x => x is done at the LIR
+            // level, not the MIR level, hence the now-pointless WasmParameter
+            // node is not DCE'd away, since DCE only happens at the MIR level.
+            // In fact all targets suffer from the latter problem, but on x86
+            // no_prefix_x86:true hides it, and on arm32/64 the pointless move
+            // is correctly transformed by RA into a no-op.
+            `mov %edi, %ecx
              mov %ecx, %eax`,
      x86:   `movl 0x10\\(%rbp\\), %eax`,
      arm64: ``,
@@ -125,9 +122,7 @@ codegenTestMultiplatform_adhoc(
     `(module (func (export "mul64_twoL") (param $p1 i64) (result i64)
        (i64.mul (i64.const 2) (local.get $p1))))`,
     "mul64_twoL",
-    {x64:   `mov %rdi, %rcx
-             mov %rcx, %rax
-             add %rax, %rax`,
+    {x64:   `lea \\(%rdi,%rdi,1\\), %rax`,
      x86:   `movl 0x14\\(%rbp\\), %edx
              movl 0x10\\(%rbp\\), %eax
              add %eax, %eax
@@ -153,9 +148,7 @@ codegenTestMultiplatform_adhoc(
     `(module (func (export "mul64_fourL") (param $p1 i64) (result i64)
        (i64.mul (i64.const 4) (local.get $p1))))`,
     "mul64_fourL",
-    {x64:   `mov %rdi, %rcx
-             mov %rcx, %rax
-             shl \\$0x02, %rax`,
+    {x64:   `lea \\(,%rdi,4\\), %rax`,
      x86:   `movl 0x14\\(%rbp\\), %edx
              movl 0x10\\(%rbp\\), %eax
              shld \\$0x02, %eax, %edx
@@ -189,9 +182,7 @@ codegenTestMultiplatform_adhoc(
     `(module (func (export "mul64_zeroR") (param $p1 i64) (result i64)
        (i64.mul (local.get $p1) (i64.const 0))))`,
     "mul64_zeroR",
-    {x64:   `mov %rdi, %rcx
-             mov %rcx, %rax
-             xor %rax, %rax`,     // REX.W is redundant
+    {x64:   `xor %rax, %rax`,     // REX.W is redundant
      x86:   `xor %eax, %eax
              xor %edx, %edx`,
      arm64: `mov x0, xzr`,
@@ -265,9 +256,7 @@ codegenTestMultiplatform_adhoc(
     `(module (func (export "mul64_twoR") (param $p1 i64) (result i64)
        (i64.mul (local.get $p1) (i64.const 2))))`,
     "mul64_twoR",
-    {x64:   `mov %rdi, %rcx
-             mov %rcx, %rax
-             add %rax, %rax`,
+    {x64:   `lea \\(%rdi,%rdi,1\\), %rax`,
      x86:   `movl 0x14\\(%rbp\\), %edx
              movl 0x10\\(%rbp\\), %eax
              add %eax, %eax
@@ -293,9 +282,7 @@ codegenTestMultiplatform_adhoc(
     `(module (func (export "mul64_fourR") (param $p1 i64) (result i64)
        (i64.mul (local.get $p1) (i64.const 4))))`,
     "mul64_fourR",
-    {x64:   `mov %rdi, %rcx
-             mov %rcx, %rax
-             shl \\$0x02, %rax`,
+    {x64:   `lea \\(,%rdi,4\\), %rax`,
      x86:   `movl 0x14\\(%rbp\\), %edx
              movl 0x10\\(%rbp\\), %eax
              shld \\$0x02, %eax, %edx
diff --git a/js/src/jit/LIROps.yaml b/js/src/jit/LIROps.yaml
@@ -1351,7 +1351,7 @@
   operands:
     lhs: Int64
     rhs: Int64
-#if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM)
+#if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_ARM)
   num_temps: 1
 #endif
   defer_init: true
diff --git a/js/src/jit/x64/CodeGenerator-x64.cpp b/js/src/jit/x64/CodeGenerator-x64.cpp
@@ -123,41 +123,77 @@ void CodeGenerator::visitUnbox(LUnbox* unbox) {
 }
 
 void CodeGenerator::visitMulI64(LMulI64* lir) {
-  Register64 lhs = ToRegister64(lir->lhs());
+  Register lhs = ToRegister64(lir->lhs()).reg;
   LInt64Allocation rhs = lir->rhs();
-
-  MOZ_ASSERT(ToOutRegister64(lir) == lhs);
+  Register out = ToOutRegister64(lir).reg;
 
   if (IsConstant(rhs)) {
     int64_t constant = ToInt64(rhs);
     switch (constant) {
       case -1:
-        masm.neg64(lhs);
-        return;
+        if (lhs != out) {
+          masm.movq(lhs, out);
+        }
+        masm.negq(out);
+        break;
       case 0:
-        masm.xor64(lhs, lhs);
-        return;
+        masm.xorq(out, out);
+        break;
       case 1:
-        // nop
-        return;
+        if (lhs != out) {
+          masm.movq(lhs, out);
+        }
+        break;
       case 2:
-        masm.add64(lhs, lhs);
-        return;
-      default:
-        if (constant > 0) {
-          // Use shift if constant is power of 2.
-          int32_t shift = mozilla::FloorLog2(constant);
-          if (int64_t(1) << shift == constant) {
-            masm.lshift64(Imm32(shift), lhs);
-            return;
+        if (lhs == out) {
+          masm.addq(lhs, lhs);
+        } else {
+          masm.lea(Operand(lhs, lhs, TimesOne), out);
+        }
+        break;
+      case 3:
+        masm.lea(Operand(lhs, lhs, TimesTwo), out);
+        break;
+      case 4:
+        if (lhs == out) {
+          masm.shlq(Imm32(2), lhs);
+        } else {
+          masm.lea(Operand(lhs, TimesFour, 0), out);
+        }
+        break;
+      case 5:
+        masm.lea(Operand(lhs, lhs, TimesFour), out);
+        break;
+      case 8:
+        if (lhs == out) {
+          masm.shlq(Imm32(3), lhs);
+        } else {
+          masm.lea(Operand(lhs, TimesEight, 0), out);
+        }
+        break;
+      case 9:
+        masm.lea(Operand(lhs, lhs, TimesEight), out);
+        break;
+      default: {
+        // Use shift if constant is power of 2.
+        int32_t shift = mozilla::FloorLog2(constant);
+        if (constant > 0 && (1 << shift) == constant) {
+          if (lhs != out) {
+            masm.movq(lhs, out);
           }
+          masm.shlq(Imm32(shift), out);
+        } else if (int32_t(constant) == constant) {
+          masm.imulq(Imm32(constant), lhs, out);
+        } else {
+          MOZ_ASSERT(out == lhs);
+          masm.mul64(Imm64(constant), Register64(lhs));
         }
-        Register temp = ToTempRegisterOrInvalid(lir->temp0());
-        masm.mul64(Imm64(constant), lhs, temp);
+        break;
+      }
     }
   } else {
-    Register temp = ToTempRegisterOrInvalid(lir->temp0());
-    masm.mul64(ToOperandOrRegister64(rhs), lhs, temp);
+    MOZ_ASSERT(out == lhs);
+    masm.imulq(ToOperandOrRegister64(rhs), lhs);
   }
 }
 
diff --git a/js/src/jit/x64/Lowering-x64.cpp b/js/src/jit/x64/Lowering-x64.cpp
@@ -62,12 +62,22 @@ void LIRGeneratorX64::lowerForALUInt64(
 
 void LIRGeneratorX64::lowerForMulInt64(LMulI64* ins, MMul* mir,
                                        MDefinition* lhs, MDefinition* rhs) {
-  // X64 doesn't need a temp for 64bit multiplication.
+  // No input reuse needed when we can use imulq with an int32 immediate.
+  bool reuseInput = true;
+  if (rhs->isConstant()) {
+    int64_t constant = rhs->toConstant()->toInt64();
+    reuseInput = int32_t(constant) != constant;
+  }
+
   ins->setLhs(useInt64RegisterAtStart(lhs));
   ins->setRhs(willHaveDifferentLIRNodes(lhs, rhs)
                   ? useInt64OrConstant(rhs)
                   : useInt64OrConstantAtStart(rhs));
-  defineInt64ReuseInput(ins, mir, 0);
+  if (reuseInput) {
+    defineInt64ReuseInput(ins, mir, 0);
+  } else {
+    defineInt64(ins, mir);
+  }
 }
 
 void LIRGenerator::visitBox(MBox* box) {

	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE

M	js/src/jit-test/tests/wasm/binop-x64-ion-codegen.js	\|	50	+++++++++++++++++++++++++++++++++++++++++++-------
M	js/src/jit-test/tests/wasm/ion-adhoc-multiplatform.js	\|	43	+++++++++++++++----------------------------
M	js/src/jit/LIROps.yaml	\|	2	+-
M	js/src/jit/x64/CodeGenerator-x64.cpp	\|	80	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------
M	js/src/jit/x64/Lowering-x64.cpp	\|	14	++++++++++++--