[ tor-browser ].git.dasho

commit ae14d1de4d426ad886ccccfa637780f7393b1936
parent 1322860b6d3a6a07156e32f59e97fb38782d53b5
Author: Alexandru Marc <amarc@mozilla.com>
Date:   Mon, 27 Oct 2025 12:34:31 +0200

Revert (Bug 1996346, Bug 1996345, Bug 1996344, Bug 1996343) for causing SM bustages binop-x64-ion-codegen.js

This reverts commit b48243d060f0e0bd84e66d795ddcec182214b3dc.

Revert "Bug 1996346 - Part 3: Split lowering and codegen for LShiftI64. r=spidermonkey-reviewers,iain"

This reverts commit 1949d39c47bf940312a88072d90d0e49775d21e7.

Revert "Bug 1996346 - Part 2: Relax register constraints for UrshD on x86-shared. r=spidermonkey-reviewers,iain"

This reverts commit f1ea8fcc82f116506a7b3d2edd8329ef91985294.

Revert "Bug 1996346 - Part 1: Don't reuse input for shift instructions when BMI2 is available. r=spidermonkey-reviewers,iain"

This reverts commit ef2f301bc14372d2a2072dd546d3a63f38babad5.

Revert "Bug 1996345 - Part 4: Use three operand imul instruction for Int64 when possible. r=spidermonkey-reviewers,iain"

This reverts commit 130494460295b72c62f2d7c6763cb00c89189a85.

Revert "Bug 1996345 - Part 3: Support MEM_SCALE_NOBASE for 64-bit lea. r=spidermonkey-reviewers,iain"

This reverts commit 979b5c3d12aa2503aabdff77ec4c8f71ed3b9d37.

Revert "Bug 1996345 - Part 2: Split codegen for CodeGenerator::visitMulI64. r=spidermonkey-reviewers,iain"

This reverts commit e5cbfb4bc69ecd1d72e44df4601b32601c5fc9bb.

Revert "Bug 1996345 - Part 1: Use three operand imul instruction when possible. r=spidermonkey-reviewers,iain"

This reverts commit 86cef46aa081c087e3129bb5e012311b9494500d.

Revert "Bug 1996344 - Part 3: Remove unused ToOperand method. r=spidermonkey-reviewers,iain"

This reverts commit 7b29457f213ccab25b5e2ea05bae3210936f0c57.

Revert "Bug 1996344 - Part 2: Clean-up ALU operations codegen for x86-shared. r=spidermonkey-reviewers,iain"

This reverts commit 1f3652d44b7e529155e4a967b88b556dd4b76375.

Revert "Bug 1996344 - Part 1: Support lea instruction when constant is left-hand side operand. r=spidermonkey-reviewers,iain"

This reverts commit ab22f181a2f5f330406c5451c46d5fbfea842651.

Revert "Bug 1996343: Use AVX encoding for unary lowerForFPU. r=spidermonkey-reviewers,iain"

This reverts commit 11b4c69ef331696376eaecba6b00b3625d13d4a5.

Diffstat:
M js/src/jit-test/tests/wasm/binop-x64-ion-codegen.js  | 58 +++++++++++-----------------------------------------------
M js/src/jit-test/tests/wasm/ion-adhoc-multiplatform.js  | 69 ++++++++++++++++++++++++++++++++++++++++++++++++---------------------
M js/src/jit/LIROps.yaml  | 2 +-
M js/src/jit/x64/Assembler-x64.h  | 3 ---
M js/src/jit/x64/BaseAssembler-x64.h  | 6 ------
M js/src/jit/x64/CodeGenerator-x64.cpp  | 196 -------------------------------------------------------------------------------
M js/src/jit/x64/Lowering-x64.cpp  | 57 ++-------------------------------------------------------
M js/src/jit/x64/Lowering-x64.h  | 4 ----
M js/src/jit/x86-shared/BaseAssembler-x86-shared.h  | 8 --------
M js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp  | 351 +++++++++++++++++++++++++++++++++++++++++++++----------------------------------
M js/src/jit/x86-shared/CodeGenerator-x86-shared.h  | 1 +
M js/src/jit/x86-shared/Lowering-x86-shared.cpp  | 111 ++++++++++++++++++++++++++++++++++++++++++++++---------------------------------
M js/src/jit/x86-shared/Lowering-x86-shared.h  | 4 ++++
M js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h  | 7 +++++--
M js/src/jit/x86/CodeGenerator-x86.cpp  | 146 -------------------------------------------------------------------------------
M js/src/jit/x86/Lowering-x86.cpp  | 34 ----------------------------------
M js/src/jit/x86/Lowering-x86.h  | 4 ----

17 files changed, 336 insertions(+), 725 deletions(-)
diff --git a/js/src/jit-test/tests/wasm/binop-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/binop-x64-ion-codegen.js
@@ -37,7 +37,7 @@ let zero32 =
 codegenTestX64_adhoc(
     zero32,
     'f',
-    'xor %eax, %eax');
+    'xor %eax, %eax', {no_prefix:true});
 assertEq(wasmEvalText(zero32).exports.f(-37), 0)
 assertEq(wasmEvalText(zero32).exports.f(42), 0)
 
@@ -47,7 +47,7 @@ let zero64 = `(module
 codegenTestX64_adhoc(
     zero64,
     'f',
-    'xor %rax, %rax');
+    'xor %rax, %rax', {no_prefix:true});
 assertEq(wasmEvalText(zero64).exports.f(-37000000000n), 0n)
 assertEq(wasmEvalText(zero64).exports.f(42000000000n), 0n)
 
@@ -74,7 +74,7 @@ codegenTestX64_adhoc(
 assertEq(wasmEvalText(one64).exports.f(-37000000000n), -37000000000n)
 assertEq(wasmEvalText(one64).exports.f(42000000000n), 42000000000n)
 
-// Test that multiplication by two yields lea
+// Test that multiplication by two yields an add
 
 let double32 =
     `(module
@@ -83,7 +83,7 @@ let double32 =
 codegenTestX64_adhoc(
     double32,
     'f',
-    'lea \\(%rdi,%rdi,1\\), %eax');
+    'add %eax, %eax', {no_prefix:true});
 assertEq(wasmEvalText(double32).exports.f(-37), -74)
 assertEq(wasmEvalText(double32).exports.f(42), 84)
 
@@ -93,11 +93,11 @@ let double64 = `(module
 codegenTestX64_adhoc(
     double64,
     'f',
-    'lea \\(%rdi,%rdi,1\\), %rax');
+    'add %rax, %rax', {no_prefix:true});
 assertEq(wasmEvalText(double64).exports.f(-37000000000n), -74000000000n)
 assertEq(wasmEvalText(double64).exports.f(42000000000n), 84000000000n)
 
-// Test that multiplication by four yields lea
+// Test that multiplication by four yields a shift
 
 let quad32 =
     `(module
@@ -106,7 +106,7 @@ let quad32 =
 codegenTestX64_adhoc(
     quad32,
     'f',
-    'lea \\(,%rdi,4\\), %eax');
+    'shl \\$0x02, %eax', {no_prefix:true});
 assertEq(wasmEvalText(quad32).exports.f(-37), -148)
 assertEq(wasmEvalText(quad32).exports.f(42), 168)
 
@@ -116,11 +116,11 @@ let quad64 = `(module
 codegenTestX64_adhoc(
     quad64,
     'f',
-    'lea \\(,%rdi,4\\), %rax');
+    'shl \\$0x02, %rax', {no_prefix:true});
 assertEq(wasmEvalText(quad64).exports.f(-37000000000n), -148000000000n)
 assertEq(wasmEvalText(quad64).exports.f(42000000000n), 168000000000n)
 
-// Test that multiplication by five yields lea
+// Test that multiplication by five yields a multiply
 
 let quint32 =
     `(module
@@ -129,7 +129,7 @@ let quint32 =
 codegenTestX64_adhoc(
     quint32,
     'f',
-    'lea \\(%rdi,%rdi,4\\), %eax');
+    'imul \\$0x05, %eax, %eax', {no_prefix:true});
 assertEq(wasmEvalText(quint32).exports.f(-37), -37*5)
 assertEq(wasmEvalText(quint32).exports.f(42), 42*5)
 
@@ -139,46 +139,10 @@ let quint64 = `(module
 codegenTestX64_adhoc(
     quint64,
     'f',
-    `lea \\(%rdi,%rdi,4\\), %rax`)
+    `imul \\$0x05, %rax, %rax`, {no_prefix:true})
 assertEq(wasmEvalText(quint64).exports.f(-37000000000n), -37000000000n*5n)
 assertEq(wasmEvalText(quint64).exports.f(42000000000n), 42000000000n*5n)
 
-// Test that multiplication by six yields imul
-
-let sext32 =
-    `(module
-       (func (export "f") (param i32) (result i32)
-         (i32.mul (local.get 0) (i32.const 6))))`;
-codegenTestX64_adhoc(
-    sext32,
-    'f',
-    'imul \\$0x06, %edi, %eax');
-assertEq(wasmEvalText(sext32).exports.f(-37), -37*6)
-assertEq(wasmEvalText(sext32).exports.f(42), 42*6)
-
-let sext64 = `(module
-       (func (export "f") (param i64) (result i64)
-         (i64.mul (local.get 0) (i64.const 6))))`
-codegenTestX64_adhoc(
-    sext64,
-    'f',
-    `imul \\$0x06, %rdi, %rax`)
-assertEq(wasmEvalText(sext64).exports.f(-37000000000n), -37000000000n*6n)
-assertEq(wasmEvalText(sext64).exports.f(42000000000n), 42000000000n*6n)
-
-// Test that multiplication by UINT32_MAX yields imul
-
-let uint32max64 = `(module
-       (func (export "f") (param i64) (result i64)
-         (i64.mul (local.get 0) (i64.const 0xffffffff))))`
-codegenTestX64_adhoc(
-    uint32max64,
-    'f',
-    `mov \\$-0x01, %r11d
-     imul %r11, %rax`, {no_prefix:true})
-assertEq(wasmEvalText(uint32max64).exports.f(-37000000000n), BigInt.asIntN(64, -37000000000n*0xffffffffn))
-assertEq(wasmEvalText(uint32max64).exports.f(42000000000n), BigInt.asIntN(64, 42000000000n*0xffffffffn))
-
 // Test that 0-n yields negation.
 
 let subneg32 =
diff --git a/js/src/jit-test/tests/wasm/ion-adhoc-multiplatform.js b/js/src/jit-test/tests/wasm/ion-adhoc-multiplatform.js
@@ -30,7 +30,18 @@ codegenTestMultiplatform_adhoc(
     `(module (func (export "mul32_zeroL") (param $p1 i32) (result i32)
        (i32.mul (i32.const 0) (local.get $p1))))`,
     "mul32_zeroL",
-    {x64:   `xor %eax, %eax`,
+    {x64:   // FIXME move folding to MIR level
+            // First we move edi to eax unnecessarily via ecx (bug 1752520),
+            // then we overwrite eax.  Presumably because the folding
+            // 0 * x => 0 is done at the LIR level, not the MIR level, hence
+            // the now-pointless WasmParameter node is not DCE'd away, since
+            // DCE only happens at the MIR level.  In fact all targets suffer
+            // from the latter problem, but on x86 no_prefix_x86:true
+            // hides it, and on arm32/64 the pointless move is correctly
+            // transformed by RA into a no-op.
+            `mov %edi, %ecx
+             mov %ecx, %eax
+             xor %eax, %eax`,
      x86:   `xor %eax, %eax`,
      arm64: `mov w0, wzr`,
      arm:   `mov r0, #0`},
@@ -40,8 +51,11 @@ codegenTestMultiplatform_adhoc(
     `(module (func (export "mul64_zeroL") (param $p1 i64) (result i64)
        (i64.mul (i64.const 0) (local.get $p1))))`,
     "mul64_zeroL",
-    // FIXME zero-creation insns could be improved
-    {x64:   `xor %rax, %rax`,     // REX.W is redundant
+    // FIXME folding happened, zero-creation insns could be improved
+    {x64:   // Same shenanigans as above.  Also, on xor, REX.W is redundant.
+            `mov %rdi, %rcx
+             mov %rcx, %rax
+             xor %rax, %rax`,
      x86:   `xor %eax, %eax
              xor %edx, %edx`,
      arm64: `mov x0, xzr`,
@@ -54,14 +68,7 @@ codegenTestMultiplatform_adhoc(
     `(module (func (export "mul32_oneL") (param $p1 i32) (result i32)
        (i32.mul (i32.const 1) (local.get $p1))))`,
     "mul32_oneL",
-    {x64:   // We move edi to eax unnecessarily via ecx (bug 1752520).
-            // Presumably because the folding 1 * x => x is done at the LIR
-            // level, not the MIR level, hence the now-pointless WasmParameter
-            // node is not DCE'd away, since DCE only happens at the MIR level.
-            // In fact all targets suffer from the latter problem, but on x86
-            // no_prefix_x86:true hides it, and on arm32/64 the pointless move
-            // is correctly transformed by RA into a no-op.
-            `mov %edi, %ecx
+    {x64:   `mov %edi, %ecx
              mov %ecx, %eax`,
      x86:   `movl 0x10\\(%rbp\\), %eax`,
      arm64: ``,
@@ -111,7 +118,9 @@ codegenTestMultiplatform_adhoc(
     `(module (func (export "mul32_twoL") (param $p1 i32) (result i32)
        (i32.mul (i32.const 2) (local.get $p1))))`,
     "mul32_twoL",
-    {x64:   `lea \\(%rdi,%rdi,1\\), %eax`,
+    {x64:   `mov %edi, %ecx
+             mov %ecx, %eax
+             add %eax, %eax`,
      x86:   `movl 0x10\\(%rbp\\), %eax
              add %eax, %eax`,
      arm64: `add w0, w0, w0`,
@@ -122,7 +131,9 @@ codegenTestMultiplatform_adhoc(
     `(module (func (export "mul64_twoL") (param $p1 i64) (result i64)
        (i64.mul (i64.const 2) (local.get $p1))))`,
     "mul64_twoL",
-    {x64:   `lea \\(%rdi,%rdi,1\\), %rax`,
+    {x64:   `mov %rdi, %rcx
+             mov %rcx, %rax
+             add %rax, %rax`,
      x86:   `movl 0x14\\(%rbp\\), %edx
              movl 0x10\\(%rbp\\), %eax
              add %eax, %eax
@@ -137,7 +148,9 @@ codegenTestMultiplatform_adhoc(
     `(module (func (export "mul32_fourL") (param $p1 i32) (result i32)
        (i32.mul (i32.const 4) (local.get $p1))))`,
     "mul32_fourL",
-    {x64:   `lea \\(,%rdi,4\\), %eax`,
+    {x64:   `mov %edi, %ecx
+             mov %ecx, %eax
+             shl \\$0x02, %eax`,
      x86:   `movl 0x10\\(%rbp\\), %eax
              shl \\$0x02, %eax`,
      arm64: `lsl w0, w0, #2`,
@@ -148,7 +161,9 @@ codegenTestMultiplatform_adhoc(
     `(module (func (export "mul64_fourL") (param $p1 i64) (result i64)
        (i64.mul (i64.const 4) (local.get $p1))))`,
     "mul64_fourL",
-    {x64:   `lea \\(,%rdi,4\\), %rax`,
+    {x64:   `mov %rdi, %rcx
+             mov %rcx, %rax
+             shl \\$0x02, %rax`,
      x86:   `movl 0x14\\(%rbp\\), %edx
              movl 0x10\\(%rbp\\), %eax
              shld \\$0x02, %eax, %edx
@@ -172,7 +187,9 @@ codegenTestMultiplatform_adhoc(
     `(module (func (export "mul32_zeroR") (param $p1 i32) (result i32)
        (i32.mul (local.get $p1) (i32.const 0))))`,
     "mul32_zeroR",
-    {x64:   `xor %eax, %eax`,
+    {x64:   `mov %edi, %ecx
+             mov %ecx, %eax
+             xor %eax, %eax`,
      x86:   `xor %eax, %eax`,
      arm64: `mov w0, wzr`,
      arm:   `mov r0, #0`},
@@ -182,7 +199,9 @@ codegenTestMultiplatform_adhoc(
     `(module (func (export "mul64_zeroR") (param $p1 i64) (result i64)
        (i64.mul (local.get $p1) (i64.const 0))))`,
     "mul64_zeroR",
-    {x64:   `xor %rax, %rax`,     // REX.W is redundant
+    {x64:   `mov %rdi, %rcx
+             mov %rcx, %rax
+             xor %rax, %rax`,     // REX.W is redundant
      x86:   `xor %eax, %eax
              xor %edx, %edx`,
      arm64: `mov x0, xzr`,
@@ -245,7 +264,9 @@ codegenTestMultiplatform_adhoc(
     `(module (func (export "mul32_twoR") (param $p1 i32) (result i32)
        (i32.mul (local.get $p1) (i32.const 2))))`,
     "mul32_twoR",
-    {x64:   `lea \\(%rdi,%rdi,1\\), %eax`,
+    {x64:   `mov %edi, %ecx
+             mov %ecx, %eax
+             add %eax, %eax`,
      x86:   `movl 0x10\\(%rbp\\), %eax
              add %eax, %eax`,
      arm64: `add w0, w0, w0`,
@@ -256,7 +277,9 @@ codegenTestMultiplatform_adhoc(
     `(module (func (export "mul64_twoR") (param $p1 i64) (result i64)
        (i64.mul (local.get $p1) (i64.const 2))))`,
     "mul64_twoR",
-    {x64:   `lea \\(%rdi,%rdi,1\\), %rax`,
+    {x64:   `mov %rdi, %rcx
+             mov %rcx, %rax
+             add %rax, %rax`,
      x86:   `movl 0x14\\(%rbp\\), %edx
              movl 0x10\\(%rbp\\), %eax
              add %eax, %eax
@@ -271,7 +294,9 @@ codegenTestMultiplatform_adhoc(
     `(module (func (export "mul32_fourR") (param $p1 i32) (result i32)
        (i32.mul (local.get $p1) (i32.const 4))))`,
     "mul32_fourR",
-    {x64:   `lea \\(,%rdi,4\\), %eax`,
+    {x64:   `mov %edi, %ecx
+             mov %ecx, %eax
+             shl \\$0x02, %eax`,
      x86:   `movl 0x10\\(%rbp\\), %eax
              shl \\$0x02, %eax`,
      arm64: `lsl w0, w0, #2`,
@@ -282,7 +307,9 @@ codegenTestMultiplatform_adhoc(
     `(module (func (export "mul64_fourR") (param $p1 i64) (result i64)
        (i64.mul (local.get $p1) (i64.const 4))))`,
     "mul64_fourR",
-    {x64:   `lea \\(,%rdi,4\\), %rax`,
+    {x64:   `mov %rdi, %rcx
+             mov %rcx, %rax
+             shl \\$0x02, %rax`,
      x86:   `movl 0x14\\(%rbp\\), %edx
              movl 0x10\\(%rbp\\), %eax
              shld \\$0x02, %eax, %edx
diff --git a/js/src/jit/LIROps.yaml b/js/src/jit/LIROps.yaml
@@ -1351,7 +1351,7 @@
   operands:
     lhs: Int64
     rhs: Int64
-#if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_ARM)
+#if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM)
   num_temps: 1
 #endif
   defer_init: true
diff --git a/js/src/jit/x64/Assembler-x64.h b/js/src/jit/x64/Assembler-x64.h
@@ -1043,9 +1043,6 @@ class Assembler : public AssemblerX86Shared {
         masm.leaq_mr(src.disp(), src.base(), src.index(), src.scale(),
                      dest.encoding());
         break;
-      case Operand::MEM_SCALE_NOBASE:
-        masm.leaq_mr(src.disp(), src.index(), src.scale(), dest.encoding());
-        break;
       default:
         MOZ_CRASH("unexepcted operand kind");
     }
diff --git a/js/src/jit/x64/BaseAssembler-x64.h b/js/src/jit/x64/BaseAssembler-x64.h
@@ -711,12 +711,6 @@ class BaseAssemblerX64 : public BaseAssembler {
     m_formatter.oneByteOp64(OP_LEA, offset, base, index, scale, dst);
   }
 
-  void leaq_mr(int32_t offset, RegisterID index, int scale, RegisterID dst) {
-    spew("leaq       " MEM_os ", %s", ADDR_os(offset, index, scale),
-         GPReg64Name(dst));
-    m_formatter.oneByteOp64_disp32(OP_LEA, offset, index, scale, dst);
-  }
-
   void movq_i32m(int32_t imm, int32_t offset, RegisterID base) {
     spew("movq       $%d, " MEM_ob, imm, ADDR_ob(offset, base));
     m_formatter.oneByteOp64(OP_GROUP11_EvIz, offset, base, GROUP11_MOV);
diff --git a/js/src/jit/x64/CodeGenerator-x64.cpp b/js/src/jit/x64/CodeGenerator-x64.cpp
@@ -122,81 +122,6 @@ void CodeGenerator::visitUnbox(LUnbox* unbox) {
   }
 }
 
-void CodeGenerator::visitMulI64(LMulI64* lir) {
-  Register lhs = ToRegister64(lir->lhs()).reg;
-  LInt64Allocation rhs = lir->rhs();
-  Register out = ToOutRegister64(lir).reg;
-
-  if (IsConstant(rhs)) {
-    int64_t constant = ToInt64(rhs);
-    switch (constant) {
-      case -1:
-        if (lhs != out) {
-          masm.movq(lhs, out);
-        }
-        masm.negq(out);
-        break;
-      case 0:
-        masm.xorq(out, out);
-        break;
-      case 1:
-        if (lhs != out) {
-          masm.movq(lhs, out);
-        }
-        break;
-      case 2:
-        if (lhs == out) {
-          masm.addq(lhs, lhs);
-        } else {
-          masm.lea(Operand(lhs, lhs, TimesOne), out);
-        }
-        break;
-      case 3:
-        masm.lea(Operand(lhs, lhs, TimesTwo), out);
-        break;
-      case 4:
-        if (lhs == out) {
-          masm.shlq(Imm32(2), lhs);
-        } else {
-          masm.lea(Operand(lhs, TimesFour, 0), out);
-        }
-        break;
-      case 5:
-        masm.lea(Operand(lhs, lhs, TimesFour), out);
-        break;
-      case 8:
-        if (lhs == out) {
-          masm.shlq(Imm32(3), lhs);
-        } else {
-          masm.lea(Operand(lhs, TimesEight, 0), out);
-        }
-        break;
-      case 9:
-        masm.lea(Operand(lhs, lhs, TimesEight), out);
-        break;
-      default: {
-        // Use shift if constant is power of 2.
-        int32_t shift = mozilla::FloorLog2(constant);
-        if (constant > 0 && (1 << shift) == constant) {
-          if (lhs != out) {
-            masm.movq(lhs, out);
-          }
-          masm.shlq(Imm32(shift), out);
-        } else if (int32_t(constant) == constant) {
-          masm.imulq(Imm32(constant), lhs, out);
-        } else {
-          MOZ_ASSERT(out == lhs);
-          masm.mul64(Imm64(constant), Register64(lhs));
-        }
-        break;
-      }
-    }
-  } else {
-    MOZ_ASSERT(out == lhs);
-    masm.imulq(ToOperandOrRegister64(rhs), lhs);
-  }
-}
-
 void CodeGenerator::visitDivOrModI64(LDivOrModI64* lir) {
   Register lhs = ToRegister(lir->lhs());
   Register rhs = ToRegister(lir->rhs());
@@ -305,127 +230,6 @@ void CodeGeneratorX64::emitBigIntPtrMod(LBigIntPtrMod* ins, Register dividend,
   masm.idivq(divisor);
 }
 
-void CodeGenerator::visitShiftIntPtr(LShiftIntPtr* ins) {
-  Register lhs = ToRegister(ins->lhs());
-  const LAllocation* rhs = ins->rhs();
-  Register out = ToRegister(ins->output());
-
-  if (rhs->isConstant()) {
-    MOZ_ASSERT(out == lhs);
-
-    int32_t shift = ToIntPtr(rhs) & 0x3f;
-    switch (ins->bitop()) {
-      case JSOp::Lsh:
-        if (shift) {
-          masm.lshiftPtr(Imm32(shift), lhs);
-        }
-        break;
-      case JSOp::Rsh:
-        if (shift) {
-          masm.rshiftPtrArithmetic(Imm32(shift), lhs);
-        }
-        break;
-      case JSOp::Ursh:
-        if (shift) {
-          masm.rshiftPtr(Imm32(shift), lhs);
-        }
-        break;
-      default:
-        MOZ_CRASH("Unexpected shift op");
-    }
-  } else {
-    Register shift = ToRegister(rhs);
-    MOZ_ASSERT_IF(out != lhs, Assembler::HasBMI2());
-
-    switch (ins->bitop()) {
-      case JSOp::Lsh:
-        if (out != lhs) {
-          masm.shlxq(lhs, shift, out);
-        } else {
-          masm.lshiftPtr(shift, lhs);
-        }
-        break;
-      case JSOp::Rsh:
-        if (out != lhs) {
-          masm.sarxq(lhs, shift, out);
-        } else {
-          masm.rshiftPtrArithmetic(shift, lhs);
-        }
-        break;
-      case JSOp::Ursh:
-        if (out != lhs) {
-          masm.shrxq(lhs, shift, out);
-        } else {
-          masm.rshiftPtr(shift, lhs);
-        }
-        break;
-      default:
-        MOZ_CRASH("Unexpected shift op");
-    }
-  }
-}
-
-void CodeGenerator::visitShiftI64(LShiftI64* lir) {
-  Register lhs = ToRegister64(lir->lhs()).reg;
-  const LAllocation* rhs = lir->rhs();
-  Register out = ToOutRegister64(lir).reg;
-
-  if (rhs->isConstant()) {
-    MOZ_ASSERT(out == lhs);
-
-    int32_t shift = int32_t(rhs->toConstant()->toInt64() & 0x3F);
-    switch (lir->bitop()) {
-      case JSOp::Lsh:
-        if (shift) {
-          masm.lshiftPtr(Imm32(shift), lhs);
-        }
-        break;
-      case JSOp::Rsh:
-        if (shift) {
-          masm.rshiftPtrArithmetic(Imm32(shift), lhs);
-        }
-        break;
-      case JSOp::Ursh:
-        if (shift) {
-          masm.rshiftPtr(Imm32(shift), lhs);
-        }
-        break;
-      default:
-        MOZ_CRASH("Unexpected shift op");
-    }
-    return;
-  }
-
-  Register shift = ToRegister(rhs);
-  MOZ_ASSERT_IF(out != lhs, Assembler::HasBMI2());
-
-  switch (lir->bitop()) {
-    case JSOp::Lsh:
-      if (out != lhs) {
-        masm.shlxq(lhs, shift, out);
-      } else {
-        masm.lshiftPtr(shift, lhs);
-      }
-      break;
-    case JSOp::Rsh:
-      if (out != lhs) {
-        masm.sarxq(lhs, shift, out);
-      } else {
-        masm.rshiftPtrArithmetic(shift, lhs);
-      }
-      break;
-    case JSOp::Ursh:
-      if (out != lhs) {
-        masm.shrxq(lhs, shift, out);
-      } else {
-        masm.rshiftPtr(shift, lhs);
-      }
-      break;
-    default:
-      MOZ_CRASH("Unexpected shift op");
-  }
-}
-
 void CodeGenerator::visitAtomicLoad64(LAtomicLoad64* lir) {
   Register elements = ToRegister(lir->elements());
   Register64 out = ToOutRegister64(lir);
diff --git a/js/src/jit/x64/Lowering-x64.cpp b/js/src/jit/x64/Lowering-x64.cpp
@@ -62,67 +62,14 @@ void LIRGeneratorX64::lowerForALUInt64(
 
 void LIRGeneratorX64::lowerForMulInt64(LMulI64* ins, MMul* mir,
                                        MDefinition* lhs, MDefinition* rhs) {
-  // No input reuse needed when we can use imulq with an int32 immediate.
-  bool reuseInput = true;
-  if (rhs->isConstant()) {
-    int64_t constant = rhs->toConstant()->toInt64();
-    reuseInput = int32_t(constant) != constant;
-  }
-
+  // X64 doesn't need a temp for 64bit multiplication.
   ins->setLhs(useInt64RegisterAtStart(lhs));
   ins->setRhs(willHaveDifferentLIRNodes(lhs, rhs)
                   ? useInt64OrConstant(rhs)
                   : useInt64OrConstantAtStart(rhs));
-  if (reuseInput) {
-    defineInt64ReuseInput(ins, mir, 0);
-  } else {
-    defineInt64(ins, mir);
-  }
-}
-
-template <class LInstr>
-void LIRGeneratorX64::lowerForShiftInt64(LInstr* ins, MDefinition* mir,
-                                         MDefinition* lhs, MDefinition* rhs) {
-  if constexpr (std::is_same_v<LInstr, LShiftI64>) {
-    LAllocation rhsAlloc;
-    if (rhs->isConstant()) {
-      rhsAlloc = useOrConstantAtStart(rhs);
-    } else if (Assembler::HasBMI2()) {
-      rhsAlloc = useRegisterAtStart(rhs);
-    } else {
-      rhsAlloc = useShiftRegister(rhs);
-    }
-
-    ins->setLhs(useInt64RegisterAtStart(lhs));
-    ins->setRhs(rhsAlloc);
-    if (rhs->isConstant() || !Assembler::HasBMI2()) {
-      defineInt64ReuseInput(ins, mir, LShiftI64::LhsIndex);
-    } else {
-      defineInt64(ins, mir);
-    }
-  } else {
-    LAllocation rhsAlloc;
-    if (rhs->isConstant()) {
-      rhsAlloc = useOrConstantAtStart(rhs);
-    } else {
-      rhsAlloc = useFixed(rhs, rcx);
-    }
-
-    ins->setInput(useInt64RegisterAtStart(lhs));
-    ins->setCount(rhsAlloc);
-    defineInt64ReuseInput(ins, mir, LRotateI64::InputIndex);
-  }
+  defineInt64ReuseInput(ins, mir, 0);
 }
 
-template void LIRGeneratorX64::lowerForShiftInt64(LShiftI64* ins,
-                                                  MDefinition* mir,
-                                                  MDefinition* lhs,
-                                                  MDefinition* rhs);
-template void LIRGeneratorX64::lowerForShiftInt64(LRotateI64* ins,
-                                                  MDefinition* mir,
-                                                  MDefinition* lhs,
-                                                  MDefinition* rhs);
-
 void LIRGenerator::visitBox(MBox* box) {
   MDefinition* opd = box->getOperand(0);
 
diff --git a/js/src/jit/x64/Lowering-x64.h b/js/src/jit/x64/Lowering-x64.h
@@ -31,10 +31,6 @@ class LIRGeneratorX64 : public LIRGeneratorX86Shared {
   void lowerForMulInt64(LMulI64* ins, MMul* mir, MDefinition* lhs,
                         MDefinition* rhs);
 
-  template <class LInstr>
-  void lowerForShiftInt64(LInstr* ins, MDefinition* mir, MDefinition* lhs,
-                          MDefinition* rhs);
-
   // Returns a box allocation. reg2 is ignored on 64-bit platforms.
   LBoxAllocation useBoxFixed(MDefinition* mir, Register reg1, Register,
                              bool useAtStart = false);
diff --git a/js/src/jit/x86-shared/BaseAssembler-x86-shared.h b/js/src/jit/x86-shared/BaseAssembler-x86-shared.h
@@ -5998,14 +5998,6 @@ class BaseAssembler : public GenericAssembler {
       memoryModRM(offset, base, index, scale, reg);
     }
 
-    void oneByteOp64_disp32(OneByteOpcodeID opcode, int32_t offset,
-                            RegisterID index, int scale, int reg) {
-      m_buffer.ensureSpace(MaxInstructionSize);
-      emitRexW(reg, index, 0);
-      m_buffer.putByteUnchecked(opcode);
-      memoryModRM_disp32(offset, index, scale, reg);
-    }
-
     void oneByteOp64(OneByteOpcodeID opcode, const void* address, int reg) {
       m_buffer.ensureSpace(MaxInstructionSize);
       emitRexW(reg, 0, 0);
diff --git a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
@@ -662,21 +662,20 @@ void CodeGeneratorX86Shared::emitUndoALUOperationOOL(LInstruction* ins) {
 }
 
 void CodeGenerator::visitAddI(LAddI* ins) {
-  Register lhs = ToRegister(ins->lhs());
-  const LAllocation* rhs = ins->rhs();
-  Register out = ToRegister(ins->output());
-
-  if (rhs->isConstant()) {
-    if (lhs != out) {
+  if (ins->rhs()->isConstant()) {
+    if (MOZ_UNLIKELY(ins->numDefs() == 1 &&
+                     ins->getDef(0)->policy() !=
+                         LDefinition::MUST_REUSE_INPUT &&
+                     ToRegister(ins->lhs()) != ToRegister(ins->output()))) {
       MOZ_ASSERT(!ins->snapshot());
       // Special case to lower the add to LEA instruction.
-      masm.add32(Imm32(ToInt32(rhs)), lhs, out);
+      masm.add32(Imm32(ToInt32(ins->rhs())), ToRegister(ins->lhs()),
+                 ToRegister(ins->output()));
     } else {
-      masm.addl(Imm32(ToInt32(rhs)), lhs);
+      masm.addl(Imm32(ToInt32(ins->rhs())), ToOperand(ins->lhs()));
     }
   } else {
-    MOZ_ASSERT(out == lhs);
-    masm.addl(ToOperand(rhs), lhs);
+    masm.addl(ToOperand(ins->rhs()), ToRegister(ins->lhs()));
   }
 
   if (ins->snapshot()) {
@@ -692,29 +691,24 @@ void CodeGenerator::visitAddI(LAddI* ins) {
 }
 
 void CodeGenerator::visitAddI64(LAddI64* lir) {
-  Register64 lhs = ToRegister64(lir->lhs());
+  LInt64Allocation lhs = lir->lhs();
   LInt64Allocation rhs = lir->rhs();
 
-  MOZ_ASSERT(ToOutRegister64(lir) == lhs);
+  MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs));
 
   if (IsConstant(rhs)) {
-    masm.add64(Imm64(ToInt64(rhs)), lhs);
+    masm.add64(Imm64(ToInt64(rhs)), ToRegister64(lhs));
     return;
   }
 
-  masm.add64(ToOperandOrRegister64(rhs), lhs);
+  masm.add64(ToOperandOrRegister64(rhs), ToRegister64(lhs));
 }
 
 void CodeGenerator::visitSubI(LSubI* ins) {
-  Register lhs = ToRegister(ins->lhs());
-  const LAllocation* rhs = ins->rhs();
-
-  MOZ_ASSERT(ToRegister(ins->output()) == lhs);
-
-  if (rhs->isConstant()) {
-    masm.subl(Imm32(ToInt32(rhs)), lhs);
+  if (ins->rhs()->isConstant()) {
+    masm.subl(Imm32(ToInt32(ins->rhs())), ToOperand(ins->lhs()));
   } else {
-    masm.subl(ToOperand(rhs), lhs);
+    masm.subl(ToOperand(ins->rhs()), ToRegister(ins->lhs()));
   }
 
   if (ins->snapshot()) {
@@ -744,10 +738,8 @@ void CodeGenerator::visitSubI64(LSubI64* lir) {
 }
 
 void CodeGenerator::visitMulI(LMulI* ins) {
-  Register lhs = ToRegister(ins->lhs());
+  const LAllocation* lhs = ins->lhs();
   const LAllocation* rhs = ins->rhs();
-  Register out = ToRegister(ins->output());
-
   MMul* mul = ins->mir();
   MOZ_ASSERT_IF(mul->mode() == MMul::Integer,
                 !mul->canBeNegativeZero() && !mul->canOverflow());
@@ -758,78 +750,33 @@ void CodeGenerator::visitMulI(LMulI* ins) {
     if (mul->canBeNegativeZero() && constant <= 0) {
       Assembler::Condition bailoutCond =
           (constant == 0) ? Assembler::Signed : Assembler::Equal;
-      masm.test32(lhs, lhs);
+      masm.test32(ToRegister(lhs), ToRegister(lhs));
       bailoutIf(bailoutCond, ins->snapshot());
     }
 
-    if (!mul->canOverflow()) {
-      switch (constant) {
-        case 2:
-          if (lhs == out) {
-            masm.addl(lhs, lhs);
-          } else {
-            masm.leal(Operand(lhs, lhs, TimesOne), out);
-          }
-          return;
-        case 3:
-          masm.leal(Operand(lhs, lhs, TimesTwo), out);
-          return;
-        case 4:
-          if (lhs == out) {
-            masm.shll(Imm32(2), lhs);
-          } else {
-            masm.leal(Operand(lhs, TimesFour, 0), out);
-          }
-          return;
-        case 5:
-          masm.leal(Operand(lhs, lhs, TimesFour), out);
-          return;
-        case 8:
-          if (lhs == out) {
-            masm.shll(Imm32(3), lhs);
-          } else {
-            masm.leal(Operand(lhs, TimesEight, 0), out);
-          }
-          return;
-        case 9:
-          masm.leal(Operand(lhs, lhs, TimesEight), out);
-          return;
-        default:
-          // Use shift if cannot overflow and constant is power of 2
-          int32_t shift = FloorLog2(constant);
-          if (constant > 0 && (1 << shift) == constant) {
-            if (lhs != out) {
-              masm.movl(lhs, out);
-            }
-            masm.shll(Imm32(shift), out);
-            return;
-          }
-      }
-    }
-
     switch (constant) {
       case -1:
-        if (lhs != out) {
-          masm.movl(lhs, out);
-        }
-        masm.negl(out);
+        masm.negl(ToOperand(lhs));
         break;
       case 0:
-        masm.xorl(out, out);
+        masm.xorl(ToOperand(lhs), ToRegister(lhs));
         return;  // escape overflow check;
       case 1:
-        if (lhs != out) {
-          masm.movl(lhs, out);
-        }
+        // nop
         return;  // escape overflow check;
       case 2:
-        if (lhs == out) {
-          masm.addl(lhs, lhs);
-          break;
-        }
-        [[fallthrough]];
+        masm.addl(ToOperand(lhs), ToRegister(lhs));
+        break;
       default:
-        masm.imull(Imm32(constant), lhs, out);
+        if (!mul->canOverflow() && constant > 0) {
+          // Use shift if cannot overflow and constant is power of 2
+          int32_t shift = FloorLog2(constant);
+          if ((1 << shift) == constant) {
+            masm.shll(Imm32(shift), ToRegister(lhs));
+            return;
+          }
+        }
+        masm.imull(Imm32(ToInt32(rhs)), ToRegister(lhs));
     }
 
     // Bailout on overflow
@@ -837,9 +784,7 @@ void CodeGenerator::visitMulI(LMulI* ins) {
       bailoutIf(Assembler::Overflow, ins->snapshot());
     }
   } else {
-    MOZ_ASSERT(out == lhs);
-
-    masm.imull(ToOperand(rhs), lhs);
+    masm.imull(ToOperand(rhs), ToRegister(lhs));
 
     // Bailout on overflow
     if (mul->canOverflow()) {
@@ -865,13 +810,52 @@ void CodeGenerator::visitMulI(LMulI* ins) {
       });
       addOutOfLineCode(ool, mul);
 
-      masm.test32(lhs, lhs);
+      masm.test32(ToRegister(lhs), ToRegister(lhs));
       masm.j(Assembler::Zero, ool->entry());
       masm.bind(ool->rejoin());
     }
   }
 }
 
+void CodeGenerator::visitMulI64(LMulI64* lir) {
+  LInt64Allocation lhs = lir->lhs();
+  LInt64Allocation rhs = lir->rhs();
+
+  MOZ_ASSERT(ToRegister64(lhs) == ToOutRegister64(lir));
+
+  if (IsConstant(rhs)) {
+    int64_t constant = ToInt64(rhs);
+    switch (constant) {
+      case -1:
+        masm.neg64(ToRegister64(lhs));
+        return;
+      case 0:
+        masm.xor64(ToRegister64(lhs), ToRegister64(lhs));
+        return;
+      case 1:
+        // nop
+        return;
+      case 2:
+        masm.add64(ToRegister64(lhs), ToRegister64(lhs));
+        return;
+      default:
+        if (constant > 0) {
+          // Use shift if constant is power of 2.
+          int32_t shift = mozilla::FloorLog2(constant);
+          if (int64_t(1) << shift == constant) {
+            masm.lshift64(Imm32(shift), ToRegister64(lhs));
+            return;
+          }
+        }
+        Register temp = ToTempRegisterOrInvalid(lir->temp0());
+        masm.mul64(Imm64(constant), ToRegister64(lhs), temp);
+    }
+  } else {
+    Register temp = ToTempRegisterOrInvalid(lir->temp0());
+    masm.mul64(ToOperandOrRegister64(rhs), ToRegister64(lhs), temp);
+  }
+}
+
 void CodeGenerator::visitUDivOrMod(LUDivOrMod* ins) {
   Register lhs = ToRegister(ins->lhs());
   Register rhs = ToRegister(ins->rhs());
@@ -1453,38 +1437,36 @@ void CodeGenerator::visitModI(LModI* ins) {
 }
 
 void CodeGenerator::visitBitNotI(LBitNotI* ins) {
-  Register input = ToRegister(ins->input());
-  MOZ_ASSERT(input == ToRegister(ins->output()));
+  const LAllocation* input = ins->input();
+  MOZ_ASSERT(!input->isConstant());
 
-  masm.notl(input);
+  masm.notl(ToOperand(input));
 }
 
 void CodeGenerator::visitBitOpI(LBitOpI* ins) {
-  Register lhs = ToRegister(ins->lhs());
+  const LAllocation* lhs = ins->lhs();
   const LAllocation* rhs = ins->rhs();
 
-  MOZ_ASSERT(lhs == ToRegister(ins->output()));
-
   switch (ins->bitop()) {
     case JSOp::BitOr:
       if (rhs->isConstant()) {
-        masm.orl(Imm32(ToInt32(rhs)), lhs);
+        masm.orl(Imm32(ToInt32(rhs)), ToOperand(lhs));
       } else {
-        masm.orl(ToOperand(rhs), lhs);
+        masm.orl(ToOperand(rhs), ToRegister(lhs));
       }
       break;
     case JSOp::BitXor:
       if (rhs->isConstant()) {
-        masm.xorl(Imm32(ToInt32(rhs)), lhs);
+        masm.xorl(Imm32(ToInt32(rhs)), ToOperand(lhs));
       } else {
-        masm.xorl(ToOperand(rhs), lhs);
+        masm.xorl(ToOperand(rhs), ToRegister(lhs));
       }
       break;
     case JSOp::BitAnd:
       if (rhs->isConstant()) {
-        masm.andl(Imm32(ToInt32(rhs)), lhs);
+        masm.andl(Imm32(ToInt32(rhs)), ToOperand(lhs));
       } else {
-        masm.andl(ToOperand(rhs), lhs);
+        masm.andl(ToOperand(rhs), ToRegister(lhs));
       }
       break;
     default:
@@ -1493,31 +1475,31 @@ void CodeGenerator::visitBitOpI(LBitOpI* ins) {
 }
 
 void CodeGenerator::visitBitOpI64(LBitOpI64* lir) {
-  Register64 lhs = ToRegister64(lir->lhs());
+  LInt64Allocation lhs = lir->lhs();
   LInt64Allocation rhs = lir->rhs();
 
-  MOZ_ASSERT(ToOutRegister64(lir) == lhs);
+  MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs));
 
   switch (lir->bitop()) {
     case JSOp::BitOr:
       if (IsConstant(rhs)) {
-        masm.or64(Imm64(ToInt64(rhs)), lhs);
+        masm.or64(Imm64(ToInt64(rhs)), ToRegister64(lhs));
       } else {
-        masm.or64(ToOperandOrRegister64(rhs), lhs);
+        masm.or64(ToOperandOrRegister64(rhs), ToRegister64(lhs));
       }
       break;
     case JSOp::BitXor:
       if (IsConstant(rhs)) {
-        masm.xor64(Imm64(ToInt64(rhs)), lhs);
+        masm.xor64(Imm64(ToInt64(rhs)), ToRegister64(lhs));
       } else {
-        masm.xor64(ToOperandOrRegister64(rhs), lhs);
+        masm.xor64(ToOperandOrRegister64(rhs), ToRegister64(lhs));
       }
       break;
     case JSOp::BitAnd:
       if (IsConstant(rhs)) {
-        masm.and64(Imm64(ToInt64(rhs)), lhs);
+        masm.and64(Imm64(ToInt64(rhs)), ToRegister64(lhs));
       } else {
-        masm.and64(ToOperandOrRegister64(rhs), lhs);
+        masm.and64(ToOperandOrRegister64(rhs), ToRegister64(lhs));
       }
       break;
     default:
@@ -1528,11 +1510,8 @@ void CodeGenerator::visitBitOpI64(LBitOpI64* lir) {
 void CodeGenerator::visitShiftI(LShiftI* ins) {
   Register lhs = ToRegister(ins->lhs());
   const LAllocation* rhs = ins->rhs();
-  Register out = ToRegister(ins->output());
 
   if (rhs->isConstant()) {
-    MOZ_ASSERT(out == lhs);
-
     int32_t shift = ToInt32(rhs) & 0x1F;
     switch (ins->bitop()) {
       case JSOp::Lsh:
@@ -1559,66 +1538,138 @@ void CodeGenerator::visitShiftI(LShiftI* ins) {
     }
   } else {
     Register shift = ToRegister(rhs);
-    MOZ_ASSERT_IF(out != lhs, Assembler::HasBMI2());
+    switch (ins->bitop()) {
+      case JSOp::Lsh:
+        masm.lshift32(shift, lhs);
+        break;
+      case JSOp::Rsh:
+        masm.rshift32Arithmetic(shift, lhs);
+        break;
+      case JSOp::Ursh:
+        masm.rshift32(shift, lhs);
+        if (ins->mir()->toUrsh()->fallible()) {
+          // x >>> 0 can overflow.
+          masm.test32(lhs, lhs);
+          bailoutIf(Assembler::Signed, ins->snapshot());
+        }
+        break;
+      default:
+        MOZ_CRASH("Unexpected shift op");
+    }
+  }
+}
+
+void CodeGenerator::visitShiftIntPtr(LShiftIntPtr* ins) {
+  Register lhs = ToRegister(ins->lhs());
+  const LAllocation* rhs = ins->rhs();
 
+  if (rhs->isConstant()) {
+    constexpr intptr_t mask = (sizeof(intptr_t) * CHAR_BIT) - 1;
+    int32_t shift = ToIntPtr(rhs) & mask;
     switch (ins->bitop()) {
       case JSOp::Lsh:
-        if (out != lhs) {
-          masm.shlxl(lhs, shift, out);
-        } else {
-          masm.lshift32(shift, lhs);
+        if (shift) {
+          masm.lshiftPtr(Imm32(shift), lhs);
         }
         break;
       case JSOp::Rsh:
-        if (out != lhs) {
-          masm.sarxl(lhs, shift, out);
-        } else {
-          masm.rshift32Arithmetic(shift, lhs);
+        if (shift) {
+          masm.rshiftPtrArithmetic(Imm32(shift), lhs);
         }
         break;
       case JSOp::Ursh:
-        if (out != lhs) {
-          masm.shrxl(lhs, shift, out);
-        } else {
-          masm.rshift32(shift, lhs);
+        if (shift) {
+          masm.rshiftPtr(Imm32(shift), lhs);
         }
-        if (ins->mir()->toUrsh()->fallible()) {
-          // x >>> 0 can overflow.
-          masm.test32(out, out);
-          bailoutIf(Assembler::Signed, ins->snapshot());
+        break;
+      default:
+        MOZ_CRASH("Unexpected shift op");
+    }
+  } else {
+    Register shift = ToRegister(rhs);
+    switch (ins->bitop()) {
+      case JSOp::Lsh:
+        masm.lshiftPtr(shift, lhs);
+        break;
+      case JSOp::Rsh:
+        masm.rshiftPtrArithmetic(shift, lhs);
+        break;
+      case JSOp::Ursh:
+        masm.rshiftPtr(shift, lhs);
+        break;
+      default:
+        MOZ_CRASH("Unexpected shift op");
+    }
+  }
+}
+
+void CodeGenerator::visitShiftI64(LShiftI64* lir) {
+  LInt64Allocation lhs = lir->lhs();
+  const LAllocation* rhs = lir->rhs();
+
+  MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs));
+
+  if (rhs->isConstant()) {
+    int32_t shift = int32_t(rhs->toConstant()->toInt64() & 0x3F);
+    switch (lir->bitop()) {
+      case JSOp::Lsh:
+        if (shift) {
+          masm.lshift64(Imm32(shift), ToRegister64(lhs));
+        }
+        break;
+      case JSOp::Rsh:
+        if (shift) {
+          masm.rshift64Arithmetic(Imm32(shift), ToRegister64(lhs));
+        }
+        break;
+      case JSOp::Ursh:
+        if (shift) {
+          masm.rshift64(Imm32(shift), ToRegister64(lhs));
         }
         break;
       default:
         MOZ_CRASH("Unexpected shift op");
     }
+    return;
+  }
+
+  Register shift = ToRegister(rhs);
+#ifdef JS_CODEGEN_X86
+  MOZ_ASSERT(shift == ecx);
+#endif
+  switch (lir->bitop()) {
+    case JSOp::Lsh:
+      masm.lshift64(shift, ToRegister64(lhs));
+      break;
+    case JSOp::Rsh:
+      masm.rshift64Arithmetic(shift, ToRegister64(lhs));
+      break;
+    case JSOp::Ursh:
+      masm.rshift64(shift, ToRegister64(lhs));
+      break;
+    default:
+      MOZ_CRASH("Unexpected shift op");
   }
 }
 
 void CodeGenerator::visitUrshD(LUrshD* ins) {
   Register lhs = ToRegister(ins->lhs());
+  MOZ_ASSERT(ToRegister(ins->temp0()) == lhs);
+
   const LAllocation* rhs = ins->rhs();
   FloatRegister out = ToFloatRegister(ins->output());
-  Register temp = ToRegister(ins->temp0());
 
   if (rhs->isConstant()) {
-    MOZ_ASSERT(temp == lhs);
-
     int32_t shift = ToInt32(rhs) & 0x1F;
     if (shift) {
       masm.shrl(Imm32(shift), lhs);
     }
   } else {
-    MOZ_ASSERT_IF(temp != lhs, Assembler::HasBMI2());
-
     Register shift = ToRegister(rhs);
-    if (temp != lhs) {
-      masm.shrxl(lhs, shift, temp);
-    } else {
-      masm.rshift32(shift, lhs);
-    }
+    masm.rshift32(shift, lhs);
   }
 
-  masm.convertUInt32ToDouble(temp, out);
+  masm.convertUInt32ToDouble(lhs, out);
 }
 
 Operand CodeGeneratorX86Shared::ToOperand(const LAllocation& a) {
@@ -1635,6 +1686,10 @@ Operand CodeGeneratorX86Shared::ToOperand(const LAllocation* a) {
   return ToOperand(*a);
 }
 
+Operand CodeGeneratorX86Shared::ToOperand(const LDefinition* def) {
+  return ToOperand(def->output());
+}
+
 MoveOperand CodeGeneratorX86Shared::toMoveOperand(LAllocation a) const {
   if (a.isGeneralReg()) {
     return MoveOperand(ToRegister(a));
@@ -1813,24 +1868,16 @@ void CodeGenerator::visitNegI64(LNegI64* ins) {
 
 void CodeGenerator::visitNegD(LNegD* ins) {
   FloatRegister input = ToFloatRegister(ins->input());
-  FloatRegister output = ToFloatRegister(ins->output());
+  MOZ_ASSERT(input == ToFloatRegister(ins->output()));
 
-  ScratchDoubleScope scratch(masm);
-  masm.loadConstantDouble(-0.0, scratch);
-
-  // XOR the float in a float register with -0.0.
-  masm.vxorpd(scratch, input, output);  // s ^ 0x80000000000000
+  masm.negateDouble(input);
 }
 
 void CodeGenerator::visitNegF(LNegF* ins) {
   FloatRegister input = ToFloatRegister(ins->input());
-  FloatRegister output = ToFloatRegister(ins->output());
-
-  ScratchDoubleScope scratch(masm);
-  masm.loadConstantFloat32(-0.0f, scratch);
+  MOZ_ASSERT(input == ToFloatRegister(ins->output()));
 
-  // XOR the float in a float register with -0.0.
-  masm.vxorpd(scratch, input, output);  // s ^ 0x80000000000000
+  masm.negateFloat(input);
 }
 
 void CodeGenerator::visitCompareExchangeTypedArrayElement(
diff --git a/js/src/jit/x86-shared/CodeGenerator-x86-shared.h b/js/src/jit/x86-shared/CodeGenerator-x86-shared.h
@@ -35,6 +35,7 @@ class CodeGeneratorX86Shared : public CodeGeneratorShared {
 
   Operand ToOperand(const LAllocation& a);
   Operand ToOperand(const LAllocation* a);
+  Operand ToOperand(const LDefinition* def);
 
 #ifdef JS_PUNBOX64
   Operand ToOperandOrRegister64(const LInt64Allocation& input);
diff --git a/js/src/jit/x86-shared/Lowering-x86-shared.cpp b/js/src/jit/x86-shared/Lowering-x86-shared.cpp
@@ -68,25 +68,64 @@ void LIRGeneratorX86Shared::lowerForShift(LInstructionHelper<1, 2, 0>* ins,
 
   if (rhs->isConstant()) {
     ins->setOperand(1, useOrConstantAtStart(rhs));
-    defineReuseInput(ins, mir, 0);
   } else if (!mir->isRotate()) {
-    if (Assembler::HasBMI2()) {
-      ins->setOperand(1, useRegisterAtStart(rhs));
-      define(ins, mir);
-    } else {
-      ins->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs)
-                             ? useShiftRegister(rhs)
-                             : useShiftRegisterAtStart(rhs));
-      defineReuseInput(ins, mir, 0);
-    }
+    ins->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs)
+                           ? useShiftRegister(rhs)
+                           : useShiftRegisterAtStart(rhs));
   } else {
     ins->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs)
                            ? useFixed(rhs, ecx)
                            : useFixedAtStart(rhs, ecx));
-    defineReuseInput(ins, mir, 0);
   }
+
+  defineReuseInput(ins, mir, 0);
 }
 
+template <class LInstr>
+void LIRGeneratorX86Shared::lowerForShiftInt64(LInstr* ins, MDefinition* mir,
+                                               MDefinition* lhs,
+                                               MDefinition* rhs) {
+  LAllocation rhsAlloc;
+  if (rhs->isConstant()) {
+    rhsAlloc = useOrConstantAtStart(rhs);
+#ifdef JS_CODEGEN_X64
+  } else if (std::is_same_v<LInstr, LShiftI64>) {
+    rhsAlloc = useShiftRegister(rhs);
+  } else {
+    rhsAlloc = useFixed(rhs, rcx);
+  }
+#else
+  } else {
+    // The operands are int64, but we only care about the lower 32 bits of
+    // the RHS. On 32-bit, the code below will load that part in ecx and
+    // will discard the upper half.
+    rhsAlloc = useLowWordFixed(rhs, ecx);
+  }
+#endif
+
+  if constexpr (std::is_same_v<LInstr, LShiftI64>) {
+    ins->setLhs(useInt64RegisterAtStart(lhs));
+    ins->setRhs(rhsAlloc);
+    defineInt64ReuseInput(ins, mir, LShiftI64::LhsIndex);
+  } else {
+    ins->setInput(useInt64RegisterAtStart(lhs));
+    ins->setCount(rhsAlloc);
+#if defined(JS_NUNBOX32)
+    ins->setTemp0(temp());
+#endif
+    defineInt64ReuseInput(ins, mir, LRotateI64::InputIndex);
+  }
+}
+
+template void LIRGeneratorX86Shared::lowerForShiftInt64(LShiftI64* ins,
+                                                        MDefinition* mir,
+                                                        MDefinition* lhs,
+                                                        MDefinition* rhs);
+template void LIRGeneratorX86Shared::lowerForShiftInt64(LRotateI64* ins,
+                                                        MDefinition* mir,
+                                                        MDefinition* lhs,
+                                                        MDefinition* rhs);
+
 void LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 1, 0>* ins,
                                         MDefinition* mir, MDefinition* input) {
   ins->setOperand(0, useRegisterAtStart(input));
@@ -96,35 +135,26 @@ void LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 1, 0>* ins,
 void LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 2, 0>* ins,
                                         MDefinition* mir, MDefinition* lhs,
                                         MDefinition* rhs) {
+  ins->setOperand(0, useRegisterAtStart(lhs));
+  ins->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs)
+                         ? useOrConstant(rhs)
+                         : useOrConstantAtStart(rhs));
   if (MOZ_UNLIKELY(mir->isAdd() && mir->type() == MIRType::Int32 &&
-                   rhs->isConstant() && !mir->toAdd()->fallible())) {
+                   mir->getOperand(1)->isConstant() &&
+                   !mir->toAdd()->fallible())) {
     // Special case instruction that is widely used in Wasm during address
     // calculation. And x86 platform has LEA instruction for it.
     // See CodeGenerator::visitAddI for codegen.
-    ins->setOperand(0, useRegisterAtStart(lhs));
-    ins->setOperand(1, useOrConstantAtStart(rhs));
     define(ins, mir);
     return;
   }
-
-  ins->setOperand(0, useRegisterAtStart(lhs));
-  ins->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs)
-                         ? useOrConstant(rhs)
-                         : useOrConstantAtStart(rhs));
   defineReuseInput(ins, mir, 0);
 }
 
 void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 1, 0>* ins,
                                         MDefinition* mir, MDefinition* input) {
-  // Without AVX, we'll need to use the x86 encodings where the input must be
-  // the same location as the output.
-  if (!Assembler::HasAVX()) {
-    ins->setOperand(0, useRegisterAtStart(input));
-    defineReuseInput(ins, mir, 0);
-  } else {
-    ins->setOperand(0, useRegisterAtStart(input));
-    define(ins, mir);
-  }
+  ins->setOperand(0, useRegisterAtStart(input));
+  defineReuseInput(ins, mir, 0);
 }
 
 void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, 0>* ins,
@@ -146,21 +176,12 @@ void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, 0>* ins,
 
 void LIRGeneratorX86Shared::lowerMulI(MMul* mul, MDefinition* lhs,
                                       MDefinition* rhs) {
-  if (rhs->isConstant()) {
-    auto* lir = new (alloc()) LMulI(useRegisterAtStart(lhs),
-                                    useOrConstantAtStart(rhs), LAllocation());
-    if (mul->fallible()) {
-      assignSnapshot(lir, mul->bailoutKind());
-    }
-    define(lir, mul);
-    return;
-  }
-
   // Note: If we need a negative zero check, lhs is used twice.
   LAllocation lhsCopy = mul->canBeNegativeZero() ? use(lhs) : LAllocation();
   LMulI* lir = new (alloc())
       LMulI(useRegisterAtStart(lhs),
-            willHaveDifferentLIRNodes(lhs, rhs) ? use(rhs) : useAtStart(rhs),
+            willHaveDifferentLIRNodes(lhs, rhs) ? useOrConstant(rhs)
+                                                : useOrConstantAtStart(rhs),
             lhsCopy);
   if (mul->fallible()) {
     assignSnapshot(lir, mul->bailoutKind());
@@ -433,21 +454,19 @@ void LIRGeneratorX86Shared::lowerUrshD(MUrsh* mir) {
   MOZ_ASSERT(rhs->type() == MIRType::Int32);
   MOZ_ASSERT(mir->type() == MIRType::Double);
 
+#ifdef JS_CODEGEN_X64
+  static_assert(ecx == rcx);
+#endif
+
   LUse lhsUse = useRegisterAtStart(lhs);
   LAllocation rhsAlloc;
-  LDefinition tempDef;
   if (rhs->isConstant()) {
     rhsAlloc = useOrConstant(rhs);
-    tempDef = tempCopy(lhs, 0);
-  } else if (Assembler::HasBMI2()) {
-    rhsAlloc = useRegisterAtStart(rhs);
-    tempDef = temp();
   } else {
     rhsAlloc = useShiftRegister(rhs);
-    tempDef = tempCopy(lhs, 0);
   }
 
-  auto* lir = new (alloc()) LUrshD(lhsUse, rhsAlloc, tempDef);
+  LUrshD* lir = new (alloc()) LUrshD(lhsUse, rhsAlloc, tempCopy(lhs, 0));
   define(lir, mir);
 }
 
diff --git a/js/src/jit/x86-shared/Lowering-x86-shared.h b/js/src/jit/x86-shared/Lowering-x86-shared.h
@@ -32,6 +32,10 @@ class LIRGeneratorX86Shared : public LIRGeneratorShared {
   void lowerForALU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
                    MDefinition* lhs, MDefinition* rhs);
 
+  template <class LInstr>
+  void lowerForShiftInt64(LInstr* ins, MDefinition* mir, MDefinition* lhs,
+                          MDefinition* rhs);
+
   void lowerForFPU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir,
                    MDefinition* input);
   void lowerForFPU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
diff --git a/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h b/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h
@@ -293,15 +293,18 @@ void MacroAssembler::neg32(Register reg) { negl(reg); }
 
 void MacroAssembler::negateFloat(FloatRegister reg) {
   ScratchFloat32Scope scratch(*this);
-  loadConstantFloat32(-0.0f, scratch);
+  vpcmpeqw(Operand(scratch), scratch, scratch);
+  vpsllq(Imm32(31), scratch, scratch);
 
   // XOR the float in a float register with -0.0.
   vxorps(scratch, reg, reg);  // s ^ 0x80000000
 }
 
 void MacroAssembler::negateDouble(FloatRegister reg) {
+  // From MacroAssemblerX86Shared::maybeInlineDouble
   ScratchDoubleScope scratch(*this);
-  loadConstantDouble(-0.0, scratch);
+  vpcmpeqw(Operand(scratch), scratch, scratch);
+  vpsllq(Imm32(63), scratch, scratch);
 
   // XOR the float in a float register with -0.0.
   vxorpd(scratch, reg, reg);  // s ^ 0x80000000000000
diff --git a/js/src/jit/x86/CodeGenerator-x86.cpp b/js/src/jit/x86/CodeGenerator-x86.cpp
@@ -813,45 +813,6 @@ void CodeGeneratorX86::visitOutOfLineTruncateFloat32(
   masm.jump(ool->rejoin());
 }
 
-void CodeGenerator::visitMulI64(LMulI64* lir) {
-  Register64 lhs = ToRegister64(lir->lhs());
-  LInt64Allocation rhs = lir->rhs();
-
-  MOZ_ASSERT(ToOutRegister64(lir) == lhs);
-
-  if (IsConstant(rhs)) {
-    int64_t constant = ToInt64(rhs);
-    switch (constant) {
-      case -1:
-        masm.neg64(lhs);
-        return;
-      case 0:
-        masm.xor64(lhs, lhs);
-        return;
-      case 1:
-        // nop
-        return;
-      case 2:
-        masm.add64(lhs, lhs);
-        return;
-      default:
-        if (constant > 0) {
-          // Use shift if constant is power of 2.
-          int32_t shift = mozilla::FloorLog2(constant);
-          if (int64_t(1) << shift == constant) {
-            masm.lshift64(Imm32(shift), lhs);
-            return;
-          }
-        }
-        Register temp = ToTempRegisterOrInvalid(lir->temp0());
-        masm.mul64(Imm64(constant), lhs, temp);
-    }
-  } else {
-    Register temp = ToTempRegisterOrInvalid(lir->temp0());
-    masm.mul64(ToOperandOrRegister64(rhs), lhs, temp);
-  }
-}
-
 void CodeGenerator::visitDivOrModI64(LDivOrModI64* lir) {
   MOZ_ASSERT(gen->compilingWasm());
   MOZ_ASSERT(ToRegister(lir->instance()) == InstanceReg);
@@ -996,113 +957,6 @@ void CodeGeneratorX86::emitBigIntPtrMod(LBigIntPtrMod* ins, Register dividend,
   masm.idiv(divisor);
 }
 
-void CodeGenerator::visitShiftIntPtr(LShiftIntPtr* ins) {
-  Register lhs = ToRegister(ins->lhs());
-  const LAllocation* rhs = ins->rhs();
-  Register out = ToRegister(ins->output());
-
-  if (rhs->isConstant()) {
-    MOZ_ASSERT(out == lhs);
-
-    int32_t shift = ToIntPtr(rhs) & 0x1F;
-    switch (ins->bitop()) {
-      case JSOp::Lsh:
-        if (shift) {
-          masm.lshiftPtr(Imm32(shift), lhs);
-        }
-        break;
-      case JSOp::Rsh:
-        if (shift) {
-          masm.rshiftPtrArithmetic(Imm32(shift), lhs);
-        }
-        break;
-      case JSOp::Ursh:
-        if (shift) {
-          masm.rshiftPtr(Imm32(shift), lhs);
-        }
-        break;
-      default:
-        MOZ_CRASH("Unexpected shift op");
-    }
-  } else {
-    Register shift = ToRegister(rhs);
-    MOZ_ASSERT_IF(out != lhs, Assembler::HasBMI2());
-
-    switch (ins->bitop()) {
-      case JSOp::Lsh:
-        if (out != lhs) {
-          masm.shlxl(lhs, shift, out);
-        } else {
-          masm.lshiftPtr(shift, lhs);
-        }
-        break;
-      case JSOp::Rsh:
-        if (out != lhs) {
-          masm.sarxl(lhs, shift, out);
-        } else {
-          masm.rshiftPtrArithmetic(shift, lhs);
-        }
-        break;
-      case JSOp::Ursh:
-        if (out != lhs) {
-          masm.shrxl(lhs, shift, out);
-        } else {
-          masm.rshiftPtr(shift, lhs);
-        }
-        break;
-      default:
-        MOZ_CRASH("Unexpected shift op");
-    }
-  }
-}
-
-void CodeGenerator::visitShiftI64(LShiftI64* lir) {
-  Register64 lhs = ToRegister64(lir->lhs());
-  const LAllocation* rhs = lir->rhs();
-
-  MOZ_ASSERT(ToOutRegister64(lir) == lhs);
-
-  if (rhs->isConstant()) {
-    int32_t shift = int32_t(rhs->toConstant()->toInt64() & 0x3F);
-    switch (lir->bitop()) {
-      case JSOp::Lsh:
-        if (shift) {
-          masm.lshift64(Imm32(shift), lhs);
-        }
-        break;
-      case JSOp::Rsh:
-        if (shift) {
-          masm.rshift64Arithmetic(Imm32(shift), lhs);
-        }
-        break;
-      case JSOp::Ursh:
-        if (shift) {
-          masm.rshift64(Imm32(shift), lhs);
-        }
-        break;
-      default:
-        MOZ_CRASH("Unexpected shift op");
-    }
-    return;
-  }
-
-  Register shift = ToRegister(rhs);
-  MOZ_ASSERT(shift == ecx);
-  switch (lir->bitop()) {
-    case JSOp::Lsh:
-      masm.lshift64(shift, lhs);
-      break;
-    case JSOp::Rsh:
-      masm.rshift64Arithmetic(shift, lhs);
-      break;
-    case JSOp::Ursh:
-      masm.rshift64(shift, lhs);
-      break;
-    default:
-      MOZ_CRASH("Unexpected shift op");
-  }
-}
-
 void CodeGenerator::visitWasmSelectI64(LWasmSelectI64* lir) {
   MOZ_ASSERT(lir->mir()->type() == MIRType::Int64);
 
diff --git a/js/src/jit/x86/Lowering-x86.cpp b/js/src/jit/x86/Lowering-x86.cpp
@@ -226,40 +226,6 @@ void LIRGeneratorX86::lowerForMulInt64(LMulI64* ins, MMul* mir,
                                     LAllocation(AnyRegister(eax))));
 }
 
-template <class LInstr>
-void LIRGeneratorX86::lowerForShiftInt64(LInstr* ins, MDefinition* mir,
-                                         MDefinition* lhs, MDefinition* rhs) {
-  LAllocation rhsAlloc;
-  if (rhs->isConstant()) {
-    rhsAlloc = useOrConstantAtStart(rhs);
-  } else {
-    // The operands are int64, but we only care about the lower 32 bits of the
-    // RHS. The code below will load that part in ecx and will discard the upper
-    // half.
-    rhsAlloc = useLowWordFixed(rhs, ecx);
-  }
-
-  if constexpr (std::is_same_v<LInstr, LShiftI64>) {
-    ins->setLhs(useInt64RegisterAtStart(lhs));
-    ins->setRhs(rhsAlloc);
-    defineInt64ReuseInput(ins, mir, LShiftI64::LhsIndex);
-  } else {
-    ins->setInput(useInt64RegisterAtStart(lhs));
-    ins->setCount(rhsAlloc);
-    ins->setTemp0(temp());
-    defineInt64ReuseInput(ins, mir, LRotateI64::InputIndex);
-  }
-}
-
-template void LIRGeneratorX86::lowerForShiftInt64(LShiftI64* ins,
-                                                  MDefinition* mir,
-                                                  MDefinition* lhs,
-                                                  MDefinition* rhs);
-template void LIRGeneratorX86::lowerForShiftInt64(LRotateI64* ins,
-                                                  MDefinition* mir,
-                                                  MDefinition* lhs,
-                                                  MDefinition* rhs);
-
 void LIRGenerator::visitCompareExchangeTypedArrayElement(
     MCompareExchangeTypedArrayElement* ins) {
   MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
diff --git a/js/src/jit/x86/Lowering-x86.h b/js/src/jit/x86/Lowering-x86.h
@@ -50,10 +50,6 @@ class LIRGeneratorX86 : public LIRGeneratorX86Shared {
   void lowerForMulInt64(LMulI64* ins, MMul* mir, MDefinition* lhs,
                         MDefinition* rhs);
 
-  template <class LInstr>
-  void lowerForShiftInt64(LInstr* ins, MDefinition* mir, MDefinition* lhs,
-                          MDefinition* rhs);
-
   void lowerTruncateDToInt32(MTruncateToInt32* ins);
   void lowerTruncateFToInt32(MTruncateToInt32* ins);
   void lowerBuiltinInt64ToFloatingPoint(MBuiltinInt64ToFloatingPoint* ins);

	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE

M	js/src/jit-test/tests/wasm/binop-x64-ion-codegen.js	\|	58	+++++++++++-----------------------------------------------
M	js/src/jit-test/tests/wasm/ion-adhoc-multiplatform.js	\|	69	++++++++++++++++++++++++++++++++++++++++++++++++---------------------
M	js/src/jit/LIROps.yaml	\|	2	+-
M	js/src/jit/x64/Assembler-x64.h	\|	3	---
M	js/src/jit/x64/BaseAssembler-x64.h	\|	6	------
M	js/src/jit/x64/CodeGenerator-x64.cpp	\|	196	-------------------------------------------------------------------------------
M	js/src/jit/x64/Lowering-x64.cpp	\|	57	++-------------------------------------------------------
M	js/src/jit/x64/Lowering-x64.h	\|	4	----
M	js/src/jit/x86-shared/BaseAssembler-x86-shared.h	\|	8	--------
M	js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp	\|	351	+++++++++++++++++++++++++++++++++++++++++++++----------------------------------
M	js/src/jit/x86-shared/CodeGenerator-x86-shared.h	\|	1	+
M	js/src/jit/x86-shared/Lowering-x86-shared.cpp	\|	111	++++++++++++++++++++++++++++++++++++++++++++++---------------------------------
M	js/src/jit/x86-shared/Lowering-x86-shared.h	\|	4	++++
M	js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h	\|	7	+++++--
M	js/src/jit/x86/CodeGenerator-x86.cpp	\|	146	-------------------------------------------------------------------------------
M	js/src/jit/x86/Lowering-x86.cpp	\|	34	----------------------------------
M	js/src/jit/x86/Lowering-x86.h	\|	4	----