tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit ae14d1de4d426ad886ccccfa637780f7393b1936
parent 1322860b6d3a6a07156e32f59e97fb38782d53b5
Author: Alexandru Marc <amarc@mozilla.com>
Date:   Mon, 27 Oct 2025 12:34:31 +0200

Revert (Bug 1996346, Bug 1996345, Bug 1996344, Bug 1996343) for causing SM bustages binop-x64-ion-codegen.js

This reverts commit b48243d060f0e0bd84e66d795ddcec182214b3dc.

Revert "Bug 1996346 - Part 3: Split lowering and codegen for LShiftI64. r=spidermonkey-reviewers,iain"

This reverts commit 1949d39c47bf940312a88072d90d0e49775d21e7.

Revert "Bug 1996346 - Part 2: Relax register constraints for UrshD on x86-shared. r=spidermonkey-reviewers,iain"

This reverts commit f1ea8fcc82f116506a7b3d2edd8329ef91985294.

Revert "Bug 1996346 - Part 1: Don't reuse input for shift instructions when BMI2 is available. r=spidermonkey-reviewers,iain"

This reverts commit ef2f301bc14372d2a2072dd546d3a63f38babad5.

Revert "Bug 1996345 - Part 4: Use three operand imul instruction for Int64 when possible. r=spidermonkey-reviewers,iain"

This reverts commit 130494460295b72c62f2d7c6763cb00c89189a85.

Revert "Bug 1996345 - Part 3: Support MEM_SCALE_NOBASE for 64-bit lea. r=spidermonkey-reviewers,iain"

This reverts commit 979b5c3d12aa2503aabdff77ec4c8f71ed3b9d37.

Revert "Bug 1996345 - Part 2: Split codegen for CodeGenerator::visitMulI64. r=spidermonkey-reviewers,iain"

This reverts commit e5cbfb4bc69ecd1d72e44df4601b32601c5fc9bb.

Revert "Bug 1996345 - Part 1: Use three operand imul instruction when possible. r=spidermonkey-reviewers,iain"

This reverts commit 86cef46aa081c087e3129bb5e012311b9494500d.

Revert "Bug 1996344 - Part 3: Remove unused ToOperand method. r=spidermonkey-reviewers,iain"

This reverts commit 7b29457f213ccab25b5e2ea05bae3210936f0c57.

Revert "Bug 1996344 - Part 2: Clean-up ALU operations codegen for x86-shared. r=spidermonkey-reviewers,iain"

This reverts commit 1f3652d44b7e529155e4a967b88b556dd4b76375.

Revert "Bug 1996344 - Part 1: Support lea instruction when constant is left-hand side operand. r=spidermonkey-reviewers,iain"

This reverts commit ab22f181a2f5f330406c5451c46d5fbfea842651.

Revert "Bug 1996343: Use AVX encoding for unary lowerForFPU. r=spidermonkey-reviewers,iain"

This reverts commit 11b4c69ef331696376eaecba6b00b3625d13d4a5.

Diffstat:
Mjs/src/jit-test/tests/wasm/binop-x64-ion-codegen.js | 58+++++++++++-----------------------------------------------
Mjs/src/jit-test/tests/wasm/ion-adhoc-multiplatform.js | 69++++++++++++++++++++++++++++++++++++++++++++++++---------------------
Mjs/src/jit/LIROps.yaml | 2+-
Mjs/src/jit/x64/Assembler-x64.h | 3---
Mjs/src/jit/x64/BaseAssembler-x64.h | 6------
Mjs/src/jit/x64/CodeGenerator-x64.cpp | 196-------------------------------------------------------------------------------
Mjs/src/jit/x64/Lowering-x64.cpp | 57++-------------------------------------------------------
Mjs/src/jit/x64/Lowering-x64.h | 4----
Mjs/src/jit/x86-shared/BaseAssembler-x86-shared.h | 8--------
Mjs/src/jit/x86-shared/CodeGenerator-x86-shared.cpp | 351+++++++++++++++++++++++++++++++++++++++++++++----------------------------------
Mjs/src/jit/x86-shared/CodeGenerator-x86-shared.h | 1+
Mjs/src/jit/x86-shared/Lowering-x86-shared.cpp | 111++++++++++++++++++++++++++++++++++++++++++++++---------------------------------
Mjs/src/jit/x86-shared/Lowering-x86-shared.h | 4++++
Mjs/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h | 7+++++--
Mjs/src/jit/x86/CodeGenerator-x86.cpp | 146-------------------------------------------------------------------------------
Mjs/src/jit/x86/Lowering-x86.cpp | 34----------------------------------
Mjs/src/jit/x86/Lowering-x86.h | 4----
17 files changed, 336 insertions(+), 725 deletions(-)

diff --git a/js/src/jit-test/tests/wasm/binop-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/binop-x64-ion-codegen.js @@ -37,7 +37,7 @@ let zero32 = codegenTestX64_adhoc( zero32, 'f', - 'xor %eax, %eax'); + 'xor %eax, %eax', {no_prefix:true}); assertEq(wasmEvalText(zero32).exports.f(-37), 0) assertEq(wasmEvalText(zero32).exports.f(42), 0) @@ -47,7 +47,7 @@ let zero64 = `(module codegenTestX64_adhoc( zero64, 'f', - 'xor %rax, %rax'); + 'xor %rax, %rax', {no_prefix:true}); assertEq(wasmEvalText(zero64).exports.f(-37000000000n), 0n) assertEq(wasmEvalText(zero64).exports.f(42000000000n), 0n) @@ -74,7 +74,7 @@ codegenTestX64_adhoc( assertEq(wasmEvalText(one64).exports.f(-37000000000n), -37000000000n) assertEq(wasmEvalText(one64).exports.f(42000000000n), 42000000000n) -// Test that multiplication by two yields lea +// Test that multiplication by two yields an add let double32 = `(module @@ -83,7 +83,7 @@ let double32 = codegenTestX64_adhoc( double32, 'f', - 'lea \\(%rdi,%rdi,1\\), %eax'); + 'add %eax, %eax', {no_prefix:true}); assertEq(wasmEvalText(double32).exports.f(-37), -74) assertEq(wasmEvalText(double32).exports.f(42), 84) @@ -93,11 +93,11 @@ let double64 = `(module codegenTestX64_adhoc( double64, 'f', - 'lea \\(%rdi,%rdi,1\\), %rax'); + 'add %rax, %rax', {no_prefix:true}); assertEq(wasmEvalText(double64).exports.f(-37000000000n), -74000000000n) assertEq(wasmEvalText(double64).exports.f(42000000000n), 84000000000n) -// Test that multiplication by four yields lea +// Test that multiplication by four yields a shift let quad32 = `(module @@ -106,7 +106,7 @@ let quad32 = codegenTestX64_adhoc( quad32, 'f', - 'lea \\(,%rdi,4\\), %eax'); + 'shl \\$0x02, %eax', {no_prefix:true}); assertEq(wasmEvalText(quad32).exports.f(-37), -148) assertEq(wasmEvalText(quad32).exports.f(42), 168) @@ -116,11 +116,11 @@ let quad64 = `(module codegenTestX64_adhoc( quad64, 'f', - 'lea \\(,%rdi,4\\), %rax'); + 'shl \\$0x02, %rax', {no_prefix:true}); assertEq(wasmEvalText(quad64).exports.f(-37000000000n), -148000000000n) assertEq(wasmEvalText(quad64).exports.f(42000000000n), 168000000000n) -// Test that multiplication by five yields lea +// Test that multiplication by five yields a multiply let quint32 = `(module @@ -129,7 +129,7 @@ let quint32 = codegenTestX64_adhoc( quint32, 'f', - 'lea \\(%rdi,%rdi,4\\), %eax'); + 'imul \\$0x05, %eax, %eax', {no_prefix:true}); assertEq(wasmEvalText(quint32).exports.f(-37), -37*5) assertEq(wasmEvalText(quint32).exports.f(42), 42*5) @@ -139,46 +139,10 @@ let quint64 = `(module codegenTestX64_adhoc( quint64, 'f', - `lea \\(%rdi,%rdi,4\\), %rax`) + `imul \\$0x05, %rax, %rax`, {no_prefix:true}) assertEq(wasmEvalText(quint64).exports.f(-37000000000n), -37000000000n*5n) assertEq(wasmEvalText(quint64).exports.f(42000000000n), 42000000000n*5n) -// Test that multiplication by six yields imul - -let sext32 = - `(module - (func (export "f") (param i32) (result i32) - (i32.mul (local.get 0) (i32.const 6))))`; -codegenTestX64_adhoc( - sext32, - 'f', - 'imul \\$0x06, %edi, %eax'); -assertEq(wasmEvalText(sext32).exports.f(-37), -37*6) -assertEq(wasmEvalText(sext32).exports.f(42), 42*6) - -let sext64 = `(module - (func (export "f") (param i64) (result i64) - (i64.mul (local.get 0) (i64.const 6))))` -codegenTestX64_adhoc( - sext64, - 'f', - `imul \\$0x06, %rdi, %rax`) -assertEq(wasmEvalText(sext64).exports.f(-37000000000n), -37000000000n*6n) -assertEq(wasmEvalText(sext64).exports.f(42000000000n), 42000000000n*6n) - -// Test that multiplication by UINT32_MAX yields imul - -let uint32max64 = `(module - (func (export "f") (param i64) (result i64) - (i64.mul (local.get 0) (i64.const 0xffffffff))))` -codegenTestX64_adhoc( - uint32max64, - 'f', - `mov \\$-0x01, %r11d - imul %r11, %rax`, {no_prefix:true}) -assertEq(wasmEvalText(uint32max64).exports.f(-37000000000n), BigInt.asIntN(64, -37000000000n*0xffffffffn)) -assertEq(wasmEvalText(uint32max64).exports.f(42000000000n), BigInt.asIntN(64, 42000000000n*0xffffffffn)) - // Test that 0-n yields negation. let subneg32 = diff --git a/js/src/jit-test/tests/wasm/ion-adhoc-multiplatform.js b/js/src/jit-test/tests/wasm/ion-adhoc-multiplatform.js @@ -30,7 +30,18 @@ codegenTestMultiplatform_adhoc( `(module (func (export "mul32_zeroL") (param $p1 i32) (result i32) (i32.mul (i32.const 0) (local.get $p1))))`, "mul32_zeroL", - {x64: `xor %eax, %eax`, + {x64: // FIXME move folding to MIR level + // First we move edi to eax unnecessarily via ecx (bug 1752520), + // then we overwrite eax. Presumably because the folding + // 0 * x => 0 is done at the LIR level, not the MIR level, hence + // the now-pointless WasmParameter node is not DCE'd away, since + // DCE only happens at the MIR level. In fact all targets suffer + // from the latter problem, but on x86 no_prefix_x86:true + // hides it, and on arm32/64 the pointless move is correctly + // transformed by RA into a no-op. + `mov %edi, %ecx + mov %ecx, %eax + xor %eax, %eax`, x86: `xor %eax, %eax`, arm64: `mov w0, wzr`, arm: `mov r0, #0`}, @@ -40,8 +51,11 @@ codegenTestMultiplatform_adhoc( `(module (func (export "mul64_zeroL") (param $p1 i64) (result i64) (i64.mul (i64.const 0) (local.get $p1))))`, "mul64_zeroL", - // FIXME zero-creation insns could be improved - {x64: `xor %rax, %rax`, // REX.W is redundant + // FIXME folding happened, zero-creation insns could be improved + {x64: // Same shenanigans as above. Also, on xor, REX.W is redundant. + `mov %rdi, %rcx + mov %rcx, %rax + xor %rax, %rax`, x86: `xor %eax, %eax xor %edx, %edx`, arm64: `mov x0, xzr`, @@ -54,14 +68,7 @@ codegenTestMultiplatform_adhoc( `(module (func (export "mul32_oneL") (param $p1 i32) (result i32) (i32.mul (i32.const 1) (local.get $p1))))`, "mul32_oneL", - {x64: // We move edi to eax unnecessarily via ecx (bug 1752520). - // Presumably because the folding 1 * x => x is done at the LIR - // level, not the MIR level, hence the now-pointless WasmParameter - // node is not DCE'd away, since DCE only happens at the MIR level. - // In fact all targets suffer from the latter problem, but on x86 - // no_prefix_x86:true hides it, and on arm32/64 the pointless move - // is correctly transformed by RA into a no-op. - `mov %edi, %ecx + {x64: `mov %edi, %ecx mov %ecx, %eax`, x86: `movl 0x10\\(%rbp\\), %eax`, arm64: ``, @@ -111,7 +118,9 @@ codegenTestMultiplatform_adhoc( `(module (func (export "mul32_twoL") (param $p1 i32) (result i32) (i32.mul (i32.const 2) (local.get $p1))))`, "mul32_twoL", - {x64: `lea \\(%rdi,%rdi,1\\), %eax`, + {x64: `mov %edi, %ecx + mov %ecx, %eax + add %eax, %eax`, x86: `movl 0x10\\(%rbp\\), %eax add %eax, %eax`, arm64: `add w0, w0, w0`, @@ -122,7 +131,9 @@ codegenTestMultiplatform_adhoc( `(module (func (export "mul64_twoL") (param $p1 i64) (result i64) (i64.mul (i64.const 2) (local.get $p1))))`, "mul64_twoL", - {x64: `lea \\(%rdi,%rdi,1\\), %rax`, + {x64: `mov %rdi, %rcx + mov %rcx, %rax + add %rax, %rax`, x86: `movl 0x14\\(%rbp\\), %edx movl 0x10\\(%rbp\\), %eax add %eax, %eax @@ -137,7 +148,9 @@ codegenTestMultiplatform_adhoc( `(module (func (export "mul32_fourL") (param $p1 i32) (result i32) (i32.mul (i32.const 4) (local.get $p1))))`, "mul32_fourL", - {x64: `lea \\(,%rdi,4\\), %eax`, + {x64: `mov %edi, %ecx + mov %ecx, %eax + shl \\$0x02, %eax`, x86: `movl 0x10\\(%rbp\\), %eax shl \\$0x02, %eax`, arm64: `lsl w0, w0, #2`, @@ -148,7 +161,9 @@ codegenTestMultiplatform_adhoc( `(module (func (export "mul64_fourL") (param $p1 i64) (result i64) (i64.mul (i64.const 4) (local.get $p1))))`, "mul64_fourL", - {x64: `lea \\(,%rdi,4\\), %rax`, + {x64: `mov %rdi, %rcx + mov %rcx, %rax + shl \\$0x02, %rax`, x86: `movl 0x14\\(%rbp\\), %edx movl 0x10\\(%rbp\\), %eax shld \\$0x02, %eax, %edx @@ -172,7 +187,9 @@ codegenTestMultiplatform_adhoc( `(module (func (export "mul32_zeroR") (param $p1 i32) (result i32) (i32.mul (local.get $p1) (i32.const 0))))`, "mul32_zeroR", - {x64: `xor %eax, %eax`, + {x64: `mov %edi, %ecx + mov %ecx, %eax + xor %eax, %eax`, x86: `xor %eax, %eax`, arm64: `mov w0, wzr`, arm: `mov r0, #0`}, @@ -182,7 +199,9 @@ codegenTestMultiplatform_adhoc( `(module (func (export "mul64_zeroR") (param $p1 i64) (result i64) (i64.mul (local.get $p1) (i64.const 0))))`, "mul64_zeroR", - {x64: `xor %rax, %rax`, // REX.W is redundant + {x64: `mov %rdi, %rcx + mov %rcx, %rax + xor %rax, %rax`, // REX.W is redundant x86: `xor %eax, %eax xor %edx, %edx`, arm64: `mov x0, xzr`, @@ -245,7 +264,9 @@ codegenTestMultiplatform_adhoc( `(module (func (export "mul32_twoR") (param $p1 i32) (result i32) (i32.mul (local.get $p1) (i32.const 2))))`, "mul32_twoR", - {x64: `lea \\(%rdi,%rdi,1\\), %eax`, + {x64: `mov %edi, %ecx + mov %ecx, %eax + add %eax, %eax`, x86: `movl 0x10\\(%rbp\\), %eax add %eax, %eax`, arm64: `add w0, w0, w0`, @@ -256,7 +277,9 @@ codegenTestMultiplatform_adhoc( `(module (func (export "mul64_twoR") (param $p1 i64) (result i64) (i64.mul (local.get $p1) (i64.const 2))))`, "mul64_twoR", - {x64: `lea \\(%rdi,%rdi,1\\), %rax`, + {x64: `mov %rdi, %rcx + mov %rcx, %rax + add %rax, %rax`, x86: `movl 0x14\\(%rbp\\), %edx movl 0x10\\(%rbp\\), %eax add %eax, %eax @@ -271,7 +294,9 @@ codegenTestMultiplatform_adhoc( `(module (func (export "mul32_fourR") (param $p1 i32) (result i32) (i32.mul (local.get $p1) (i32.const 4))))`, "mul32_fourR", - {x64: `lea \\(,%rdi,4\\), %eax`, + {x64: `mov %edi, %ecx + mov %ecx, %eax + shl \\$0x02, %eax`, x86: `movl 0x10\\(%rbp\\), %eax shl \\$0x02, %eax`, arm64: `lsl w0, w0, #2`, @@ -282,7 +307,9 @@ codegenTestMultiplatform_adhoc( `(module (func (export "mul64_fourR") (param $p1 i64) (result i64) (i64.mul (local.get $p1) (i64.const 4))))`, "mul64_fourR", - {x64: `lea \\(,%rdi,4\\), %rax`, + {x64: `mov %rdi, %rcx + mov %rcx, %rax + shl \\$0x02, %rax`, x86: `movl 0x14\\(%rbp\\), %edx movl 0x10\\(%rbp\\), %eax shld \\$0x02, %eax, %edx diff --git a/js/src/jit/LIROps.yaml b/js/src/jit/LIROps.yaml @@ -1351,7 +1351,7 @@ operands: lhs: Int64 rhs: Int64 -#if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_ARM) +#if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM) num_temps: 1 #endif defer_init: true diff --git a/js/src/jit/x64/Assembler-x64.h b/js/src/jit/x64/Assembler-x64.h @@ -1043,9 +1043,6 @@ class Assembler : public AssemblerX86Shared { masm.leaq_mr(src.disp(), src.base(), src.index(), src.scale(), dest.encoding()); break; - case Operand::MEM_SCALE_NOBASE: - masm.leaq_mr(src.disp(), src.index(), src.scale(), dest.encoding()); - break; default: MOZ_CRASH("unexepcted operand kind"); } diff --git a/js/src/jit/x64/BaseAssembler-x64.h b/js/src/jit/x64/BaseAssembler-x64.h @@ -711,12 +711,6 @@ class BaseAssemblerX64 : public BaseAssembler { m_formatter.oneByteOp64(OP_LEA, offset, base, index, scale, dst); } - void leaq_mr(int32_t offset, RegisterID index, int scale, RegisterID dst) { - spew("leaq " MEM_os ", %s", ADDR_os(offset, index, scale), - GPReg64Name(dst)); - m_formatter.oneByteOp64_disp32(OP_LEA, offset, index, scale, dst); - } - void movq_i32m(int32_t imm, int32_t offset, RegisterID base) { spew("movq $%d, " MEM_ob, imm, ADDR_ob(offset, base)); m_formatter.oneByteOp64(OP_GROUP11_EvIz, offset, base, GROUP11_MOV); diff --git a/js/src/jit/x64/CodeGenerator-x64.cpp b/js/src/jit/x64/CodeGenerator-x64.cpp @@ -122,81 +122,6 @@ void CodeGenerator::visitUnbox(LUnbox* unbox) { } } -void CodeGenerator::visitMulI64(LMulI64* lir) { - Register lhs = ToRegister64(lir->lhs()).reg; - LInt64Allocation rhs = lir->rhs(); - Register out = ToOutRegister64(lir).reg; - - if (IsConstant(rhs)) { - int64_t constant = ToInt64(rhs); - switch (constant) { - case -1: - if (lhs != out) { - masm.movq(lhs, out); - } - masm.negq(out); - break; - case 0: - masm.xorq(out, out); - break; - case 1: - if (lhs != out) { - masm.movq(lhs, out); - } - break; - case 2: - if (lhs == out) { - masm.addq(lhs, lhs); - } else { - masm.lea(Operand(lhs, lhs, TimesOne), out); - } - break; - case 3: - masm.lea(Operand(lhs, lhs, TimesTwo), out); - break; - case 4: - if (lhs == out) { - masm.shlq(Imm32(2), lhs); - } else { - masm.lea(Operand(lhs, TimesFour, 0), out); - } - break; - case 5: - masm.lea(Operand(lhs, lhs, TimesFour), out); - break; - case 8: - if (lhs == out) { - masm.shlq(Imm32(3), lhs); - } else { - masm.lea(Operand(lhs, TimesEight, 0), out); - } - break; - case 9: - masm.lea(Operand(lhs, lhs, TimesEight), out); - break; - default: { - // Use shift if constant is power of 2. - int32_t shift = mozilla::FloorLog2(constant); - if (constant > 0 && (1 << shift) == constant) { - if (lhs != out) { - masm.movq(lhs, out); - } - masm.shlq(Imm32(shift), out); - } else if (int32_t(constant) == constant) { - masm.imulq(Imm32(constant), lhs, out); - } else { - MOZ_ASSERT(out == lhs); - masm.mul64(Imm64(constant), Register64(lhs)); - } - break; - } - } - } else { - MOZ_ASSERT(out == lhs); - masm.imulq(ToOperandOrRegister64(rhs), lhs); - } -} - void CodeGenerator::visitDivOrModI64(LDivOrModI64* lir) { Register lhs = ToRegister(lir->lhs()); Register rhs = ToRegister(lir->rhs()); @@ -305,127 +230,6 @@ void CodeGeneratorX64::emitBigIntPtrMod(LBigIntPtrMod* ins, Register dividend, masm.idivq(divisor); } -void CodeGenerator::visitShiftIntPtr(LShiftIntPtr* ins) { - Register lhs = ToRegister(ins->lhs()); - const LAllocation* rhs = ins->rhs(); - Register out = ToRegister(ins->output()); - - if (rhs->isConstant()) { - MOZ_ASSERT(out == lhs); - - int32_t shift = ToIntPtr(rhs) & 0x3f; - switch (ins->bitop()) { - case JSOp::Lsh: - if (shift) { - masm.lshiftPtr(Imm32(shift), lhs); - } - break; - case JSOp::Rsh: - if (shift) { - masm.rshiftPtrArithmetic(Imm32(shift), lhs); - } - break; - case JSOp::Ursh: - if (shift) { - masm.rshiftPtr(Imm32(shift), lhs); - } - break; - default: - MOZ_CRASH("Unexpected shift op"); - } - } else { - Register shift = ToRegister(rhs); - MOZ_ASSERT_IF(out != lhs, Assembler::HasBMI2()); - - switch (ins->bitop()) { - case JSOp::Lsh: - if (out != lhs) { - masm.shlxq(lhs, shift, out); - } else { - masm.lshiftPtr(shift, lhs); - } - break; - case JSOp::Rsh: - if (out != lhs) { - masm.sarxq(lhs, shift, out); - } else { - masm.rshiftPtrArithmetic(shift, lhs); - } - break; - case JSOp::Ursh: - if (out != lhs) { - masm.shrxq(lhs, shift, out); - } else { - masm.rshiftPtr(shift, lhs); - } - break; - default: - MOZ_CRASH("Unexpected shift op"); - } - } -} - -void CodeGenerator::visitShiftI64(LShiftI64* lir) { - Register lhs = ToRegister64(lir->lhs()).reg; - const LAllocation* rhs = lir->rhs(); - Register out = ToOutRegister64(lir).reg; - - if (rhs->isConstant()) { - MOZ_ASSERT(out == lhs); - - int32_t shift = int32_t(rhs->toConstant()->toInt64() & 0x3F); - switch (lir->bitop()) { - case JSOp::Lsh: - if (shift) { - masm.lshiftPtr(Imm32(shift), lhs); - } - break; - case JSOp::Rsh: - if (shift) { - masm.rshiftPtrArithmetic(Imm32(shift), lhs); - } - break; - case JSOp::Ursh: - if (shift) { - masm.rshiftPtr(Imm32(shift), lhs); - } - break; - default: - MOZ_CRASH("Unexpected shift op"); - } - return; - } - - Register shift = ToRegister(rhs); - MOZ_ASSERT_IF(out != lhs, Assembler::HasBMI2()); - - switch (lir->bitop()) { - case JSOp::Lsh: - if (out != lhs) { - masm.shlxq(lhs, shift, out); - } else { - masm.lshiftPtr(shift, lhs); - } - break; - case JSOp::Rsh: - if (out != lhs) { - masm.sarxq(lhs, shift, out); - } else { - masm.rshiftPtrArithmetic(shift, lhs); - } - break; - case JSOp::Ursh: - if (out != lhs) { - masm.shrxq(lhs, shift, out); - } else { - masm.rshiftPtr(shift, lhs); - } - break; - default: - MOZ_CRASH("Unexpected shift op"); - } -} - void CodeGenerator::visitAtomicLoad64(LAtomicLoad64* lir) { Register elements = ToRegister(lir->elements()); Register64 out = ToOutRegister64(lir); diff --git a/js/src/jit/x64/Lowering-x64.cpp b/js/src/jit/x64/Lowering-x64.cpp @@ -62,67 +62,14 @@ void LIRGeneratorX64::lowerForALUInt64( void LIRGeneratorX64::lowerForMulInt64(LMulI64* ins, MMul* mir, MDefinition* lhs, MDefinition* rhs) { - // No input reuse needed when we can use imulq with an int32 immediate. - bool reuseInput = true; - if (rhs->isConstant()) { - int64_t constant = rhs->toConstant()->toInt64(); - reuseInput = int32_t(constant) != constant; - } - + // X64 doesn't need a temp for 64bit multiplication. ins->setLhs(useInt64RegisterAtStart(lhs)); ins->setRhs(willHaveDifferentLIRNodes(lhs, rhs) ? useInt64OrConstant(rhs) : useInt64OrConstantAtStart(rhs)); - if (reuseInput) { - defineInt64ReuseInput(ins, mir, 0); - } else { - defineInt64(ins, mir); - } -} - -template <class LInstr> -void LIRGeneratorX64::lowerForShiftInt64(LInstr* ins, MDefinition* mir, - MDefinition* lhs, MDefinition* rhs) { - if constexpr (std::is_same_v<LInstr, LShiftI64>) { - LAllocation rhsAlloc; - if (rhs->isConstant()) { - rhsAlloc = useOrConstantAtStart(rhs); - } else if (Assembler::HasBMI2()) { - rhsAlloc = useRegisterAtStart(rhs); - } else { - rhsAlloc = useShiftRegister(rhs); - } - - ins->setLhs(useInt64RegisterAtStart(lhs)); - ins->setRhs(rhsAlloc); - if (rhs->isConstant() || !Assembler::HasBMI2()) { - defineInt64ReuseInput(ins, mir, LShiftI64::LhsIndex); - } else { - defineInt64(ins, mir); - } - } else { - LAllocation rhsAlloc; - if (rhs->isConstant()) { - rhsAlloc = useOrConstantAtStart(rhs); - } else { - rhsAlloc = useFixed(rhs, rcx); - } - - ins->setInput(useInt64RegisterAtStart(lhs)); - ins->setCount(rhsAlloc); - defineInt64ReuseInput(ins, mir, LRotateI64::InputIndex); - } + defineInt64ReuseInput(ins, mir, 0); } -template void LIRGeneratorX64::lowerForShiftInt64(LShiftI64* ins, - MDefinition* mir, - MDefinition* lhs, - MDefinition* rhs); -template void LIRGeneratorX64::lowerForShiftInt64(LRotateI64* ins, - MDefinition* mir, - MDefinition* lhs, - MDefinition* rhs); - void LIRGenerator::visitBox(MBox* box) { MDefinition* opd = box->getOperand(0); diff --git a/js/src/jit/x64/Lowering-x64.h b/js/src/jit/x64/Lowering-x64.h @@ -31,10 +31,6 @@ class LIRGeneratorX64 : public LIRGeneratorX86Shared { void lowerForMulInt64(LMulI64* ins, MMul* mir, MDefinition* lhs, MDefinition* rhs); - template <class LInstr> - void lowerForShiftInt64(LInstr* ins, MDefinition* mir, MDefinition* lhs, - MDefinition* rhs); - // Returns a box allocation. reg2 is ignored on 64-bit platforms. LBoxAllocation useBoxFixed(MDefinition* mir, Register reg1, Register, bool useAtStart = false); diff --git a/js/src/jit/x86-shared/BaseAssembler-x86-shared.h b/js/src/jit/x86-shared/BaseAssembler-x86-shared.h @@ -5998,14 +5998,6 @@ class BaseAssembler : public GenericAssembler { memoryModRM(offset, base, index, scale, reg); } - void oneByteOp64_disp32(OneByteOpcodeID opcode, int32_t offset, - RegisterID index, int scale, int reg) { - m_buffer.ensureSpace(MaxInstructionSize); - emitRexW(reg, index, 0); - m_buffer.putByteUnchecked(opcode); - memoryModRM_disp32(offset, index, scale, reg); - } - void oneByteOp64(OneByteOpcodeID opcode, const void* address, int reg) { m_buffer.ensureSpace(MaxInstructionSize); emitRexW(reg, 0, 0); diff --git a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp @@ -662,21 +662,20 @@ void CodeGeneratorX86Shared::emitUndoALUOperationOOL(LInstruction* ins) { } void CodeGenerator::visitAddI(LAddI* ins) { - Register lhs = ToRegister(ins->lhs()); - const LAllocation* rhs = ins->rhs(); - Register out = ToRegister(ins->output()); - - if (rhs->isConstant()) { - if (lhs != out) { + if (ins->rhs()->isConstant()) { + if (MOZ_UNLIKELY(ins->numDefs() == 1 && + ins->getDef(0)->policy() != + LDefinition::MUST_REUSE_INPUT && + ToRegister(ins->lhs()) != ToRegister(ins->output()))) { MOZ_ASSERT(!ins->snapshot()); // Special case to lower the add to LEA instruction. - masm.add32(Imm32(ToInt32(rhs)), lhs, out); + masm.add32(Imm32(ToInt32(ins->rhs())), ToRegister(ins->lhs()), + ToRegister(ins->output())); } else { - masm.addl(Imm32(ToInt32(rhs)), lhs); + masm.addl(Imm32(ToInt32(ins->rhs())), ToOperand(ins->lhs())); } } else { - MOZ_ASSERT(out == lhs); - masm.addl(ToOperand(rhs), lhs); + masm.addl(ToOperand(ins->rhs()), ToRegister(ins->lhs())); } if (ins->snapshot()) { @@ -692,29 +691,24 @@ void CodeGenerator::visitAddI(LAddI* ins) { } void CodeGenerator::visitAddI64(LAddI64* lir) { - Register64 lhs = ToRegister64(lir->lhs()); + LInt64Allocation lhs = lir->lhs(); LInt64Allocation rhs = lir->rhs(); - MOZ_ASSERT(ToOutRegister64(lir) == lhs); + MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs)); if (IsConstant(rhs)) { - masm.add64(Imm64(ToInt64(rhs)), lhs); + masm.add64(Imm64(ToInt64(rhs)), ToRegister64(lhs)); return; } - masm.add64(ToOperandOrRegister64(rhs), lhs); + masm.add64(ToOperandOrRegister64(rhs), ToRegister64(lhs)); } void CodeGenerator::visitSubI(LSubI* ins) { - Register lhs = ToRegister(ins->lhs()); - const LAllocation* rhs = ins->rhs(); - - MOZ_ASSERT(ToRegister(ins->output()) == lhs); - - if (rhs->isConstant()) { - masm.subl(Imm32(ToInt32(rhs)), lhs); + if (ins->rhs()->isConstant()) { + masm.subl(Imm32(ToInt32(ins->rhs())), ToOperand(ins->lhs())); } else { - masm.subl(ToOperand(rhs), lhs); + masm.subl(ToOperand(ins->rhs()), ToRegister(ins->lhs())); } if (ins->snapshot()) { @@ -744,10 +738,8 @@ void CodeGenerator::visitSubI64(LSubI64* lir) { } void CodeGenerator::visitMulI(LMulI* ins) { - Register lhs = ToRegister(ins->lhs()); + const LAllocation* lhs = ins->lhs(); const LAllocation* rhs = ins->rhs(); - Register out = ToRegister(ins->output()); - MMul* mul = ins->mir(); MOZ_ASSERT_IF(mul->mode() == MMul::Integer, !mul->canBeNegativeZero() && !mul->canOverflow()); @@ -758,78 +750,33 @@ void CodeGenerator::visitMulI(LMulI* ins) { if (mul->canBeNegativeZero() && constant <= 0) { Assembler::Condition bailoutCond = (constant == 0) ? Assembler::Signed : Assembler::Equal; - masm.test32(lhs, lhs); + masm.test32(ToRegister(lhs), ToRegister(lhs)); bailoutIf(bailoutCond, ins->snapshot()); } - if (!mul->canOverflow()) { - switch (constant) { - case 2: - if (lhs == out) { - masm.addl(lhs, lhs); - } else { - masm.leal(Operand(lhs, lhs, TimesOne), out); - } - return; - case 3: - masm.leal(Operand(lhs, lhs, TimesTwo), out); - return; - case 4: - if (lhs == out) { - masm.shll(Imm32(2), lhs); - } else { - masm.leal(Operand(lhs, TimesFour, 0), out); - } - return; - case 5: - masm.leal(Operand(lhs, lhs, TimesFour), out); - return; - case 8: - if (lhs == out) { - masm.shll(Imm32(3), lhs); - } else { - masm.leal(Operand(lhs, TimesEight, 0), out); - } - return; - case 9: - masm.leal(Operand(lhs, lhs, TimesEight), out); - return; - default: - // Use shift if cannot overflow and constant is power of 2 - int32_t shift = FloorLog2(constant); - if (constant > 0 && (1 << shift) == constant) { - if (lhs != out) { - masm.movl(lhs, out); - } - masm.shll(Imm32(shift), out); - return; - } - } - } - switch (constant) { case -1: - if (lhs != out) { - masm.movl(lhs, out); - } - masm.negl(out); + masm.negl(ToOperand(lhs)); break; case 0: - masm.xorl(out, out); + masm.xorl(ToOperand(lhs), ToRegister(lhs)); return; // escape overflow check; case 1: - if (lhs != out) { - masm.movl(lhs, out); - } + // nop return; // escape overflow check; case 2: - if (lhs == out) { - masm.addl(lhs, lhs); - break; - } - [[fallthrough]]; + masm.addl(ToOperand(lhs), ToRegister(lhs)); + break; default: - masm.imull(Imm32(constant), lhs, out); + if (!mul->canOverflow() && constant > 0) { + // Use shift if cannot overflow and constant is power of 2 + int32_t shift = FloorLog2(constant); + if ((1 << shift) == constant) { + masm.shll(Imm32(shift), ToRegister(lhs)); + return; + } + } + masm.imull(Imm32(ToInt32(rhs)), ToRegister(lhs)); } // Bailout on overflow @@ -837,9 +784,7 @@ void CodeGenerator::visitMulI(LMulI* ins) { bailoutIf(Assembler::Overflow, ins->snapshot()); } } else { - MOZ_ASSERT(out == lhs); - - masm.imull(ToOperand(rhs), lhs); + masm.imull(ToOperand(rhs), ToRegister(lhs)); // Bailout on overflow if (mul->canOverflow()) { @@ -865,13 +810,52 @@ void CodeGenerator::visitMulI(LMulI* ins) { }); addOutOfLineCode(ool, mul); - masm.test32(lhs, lhs); + masm.test32(ToRegister(lhs), ToRegister(lhs)); masm.j(Assembler::Zero, ool->entry()); masm.bind(ool->rejoin()); } } } +void CodeGenerator::visitMulI64(LMulI64* lir) { + LInt64Allocation lhs = lir->lhs(); + LInt64Allocation rhs = lir->rhs(); + + MOZ_ASSERT(ToRegister64(lhs) == ToOutRegister64(lir)); + + if (IsConstant(rhs)) { + int64_t constant = ToInt64(rhs); + switch (constant) { + case -1: + masm.neg64(ToRegister64(lhs)); + return; + case 0: + masm.xor64(ToRegister64(lhs), ToRegister64(lhs)); + return; + case 1: + // nop + return; + case 2: + masm.add64(ToRegister64(lhs), ToRegister64(lhs)); + return; + default: + if (constant > 0) { + // Use shift if constant is power of 2. + int32_t shift = mozilla::FloorLog2(constant); + if (int64_t(1) << shift == constant) { + masm.lshift64(Imm32(shift), ToRegister64(lhs)); + return; + } + } + Register temp = ToTempRegisterOrInvalid(lir->temp0()); + masm.mul64(Imm64(constant), ToRegister64(lhs), temp); + } + } else { + Register temp = ToTempRegisterOrInvalid(lir->temp0()); + masm.mul64(ToOperandOrRegister64(rhs), ToRegister64(lhs), temp); + } +} + void CodeGenerator::visitUDivOrMod(LUDivOrMod* ins) { Register lhs = ToRegister(ins->lhs()); Register rhs = ToRegister(ins->rhs()); @@ -1453,38 +1437,36 @@ void CodeGenerator::visitModI(LModI* ins) { } void CodeGenerator::visitBitNotI(LBitNotI* ins) { - Register input = ToRegister(ins->input()); - MOZ_ASSERT(input == ToRegister(ins->output())); + const LAllocation* input = ins->input(); + MOZ_ASSERT(!input->isConstant()); - masm.notl(input); + masm.notl(ToOperand(input)); } void CodeGenerator::visitBitOpI(LBitOpI* ins) { - Register lhs = ToRegister(ins->lhs()); + const LAllocation* lhs = ins->lhs(); const LAllocation* rhs = ins->rhs(); - MOZ_ASSERT(lhs == ToRegister(ins->output())); - switch (ins->bitop()) { case JSOp::BitOr: if (rhs->isConstant()) { - masm.orl(Imm32(ToInt32(rhs)), lhs); + masm.orl(Imm32(ToInt32(rhs)), ToOperand(lhs)); } else { - masm.orl(ToOperand(rhs), lhs); + masm.orl(ToOperand(rhs), ToRegister(lhs)); } break; case JSOp::BitXor: if (rhs->isConstant()) { - masm.xorl(Imm32(ToInt32(rhs)), lhs); + masm.xorl(Imm32(ToInt32(rhs)), ToOperand(lhs)); } else { - masm.xorl(ToOperand(rhs), lhs); + masm.xorl(ToOperand(rhs), ToRegister(lhs)); } break; case JSOp::BitAnd: if (rhs->isConstant()) { - masm.andl(Imm32(ToInt32(rhs)), lhs); + masm.andl(Imm32(ToInt32(rhs)), ToOperand(lhs)); } else { - masm.andl(ToOperand(rhs), lhs); + masm.andl(ToOperand(rhs), ToRegister(lhs)); } break; default: @@ -1493,31 +1475,31 @@ void CodeGenerator::visitBitOpI(LBitOpI* ins) { } void CodeGenerator::visitBitOpI64(LBitOpI64* lir) { - Register64 lhs = ToRegister64(lir->lhs()); + LInt64Allocation lhs = lir->lhs(); LInt64Allocation rhs = lir->rhs(); - MOZ_ASSERT(ToOutRegister64(lir) == lhs); + MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs)); switch (lir->bitop()) { case JSOp::BitOr: if (IsConstant(rhs)) { - masm.or64(Imm64(ToInt64(rhs)), lhs); + masm.or64(Imm64(ToInt64(rhs)), ToRegister64(lhs)); } else { - masm.or64(ToOperandOrRegister64(rhs), lhs); + masm.or64(ToOperandOrRegister64(rhs), ToRegister64(lhs)); } break; case JSOp::BitXor: if (IsConstant(rhs)) { - masm.xor64(Imm64(ToInt64(rhs)), lhs); + masm.xor64(Imm64(ToInt64(rhs)), ToRegister64(lhs)); } else { - masm.xor64(ToOperandOrRegister64(rhs), lhs); + masm.xor64(ToOperandOrRegister64(rhs), ToRegister64(lhs)); } break; case JSOp::BitAnd: if (IsConstant(rhs)) { - masm.and64(Imm64(ToInt64(rhs)), lhs); + masm.and64(Imm64(ToInt64(rhs)), ToRegister64(lhs)); } else { - masm.and64(ToOperandOrRegister64(rhs), lhs); + masm.and64(ToOperandOrRegister64(rhs), ToRegister64(lhs)); } break; default: @@ -1528,11 +1510,8 @@ void CodeGenerator::visitBitOpI64(LBitOpI64* lir) { void CodeGenerator::visitShiftI(LShiftI* ins) { Register lhs = ToRegister(ins->lhs()); const LAllocation* rhs = ins->rhs(); - Register out = ToRegister(ins->output()); if (rhs->isConstant()) { - MOZ_ASSERT(out == lhs); - int32_t shift = ToInt32(rhs) & 0x1F; switch (ins->bitop()) { case JSOp::Lsh: @@ -1559,66 +1538,138 @@ void CodeGenerator::visitShiftI(LShiftI* ins) { } } else { Register shift = ToRegister(rhs); - MOZ_ASSERT_IF(out != lhs, Assembler::HasBMI2()); + switch (ins->bitop()) { + case JSOp::Lsh: + masm.lshift32(shift, lhs); + break; + case JSOp::Rsh: + masm.rshift32Arithmetic(shift, lhs); + break; + case JSOp::Ursh: + masm.rshift32(shift, lhs); + if (ins->mir()->toUrsh()->fallible()) { + // x >>> 0 can overflow. + masm.test32(lhs, lhs); + bailoutIf(Assembler::Signed, ins->snapshot()); + } + break; + default: + MOZ_CRASH("Unexpected shift op"); + } + } +} + +void CodeGenerator::visitShiftIntPtr(LShiftIntPtr* ins) { + Register lhs = ToRegister(ins->lhs()); + const LAllocation* rhs = ins->rhs(); + if (rhs->isConstant()) { + constexpr intptr_t mask = (sizeof(intptr_t) * CHAR_BIT) - 1; + int32_t shift = ToIntPtr(rhs) & mask; switch (ins->bitop()) { case JSOp::Lsh: - if (out != lhs) { - masm.shlxl(lhs, shift, out); - } else { - masm.lshift32(shift, lhs); + if (shift) { + masm.lshiftPtr(Imm32(shift), lhs); } break; case JSOp::Rsh: - if (out != lhs) { - masm.sarxl(lhs, shift, out); - } else { - masm.rshift32Arithmetic(shift, lhs); + if (shift) { + masm.rshiftPtrArithmetic(Imm32(shift), lhs); } break; case JSOp::Ursh: - if (out != lhs) { - masm.shrxl(lhs, shift, out); - } else { - masm.rshift32(shift, lhs); + if (shift) { + masm.rshiftPtr(Imm32(shift), lhs); } - if (ins->mir()->toUrsh()->fallible()) { - // x >>> 0 can overflow. - masm.test32(out, out); - bailoutIf(Assembler::Signed, ins->snapshot()); + break; + default: + MOZ_CRASH("Unexpected shift op"); + } + } else { + Register shift = ToRegister(rhs); + switch (ins->bitop()) { + case JSOp::Lsh: + masm.lshiftPtr(shift, lhs); + break; + case JSOp::Rsh: + masm.rshiftPtrArithmetic(shift, lhs); + break; + case JSOp::Ursh: + masm.rshiftPtr(shift, lhs); + break; + default: + MOZ_CRASH("Unexpected shift op"); + } + } +} + +void CodeGenerator::visitShiftI64(LShiftI64* lir) { + LInt64Allocation lhs = lir->lhs(); + const LAllocation* rhs = lir->rhs(); + + MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs)); + + if (rhs->isConstant()) { + int32_t shift = int32_t(rhs->toConstant()->toInt64() & 0x3F); + switch (lir->bitop()) { + case JSOp::Lsh: + if (shift) { + masm.lshift64(Imm32(shift), ToRegister64(lhs)); + } + break; + case JSOp::Rsh: + if (shift) { + masm.rshift64Arithmetic(Imm32(shift), ToRegister64(lhs)); + } + break; + case JSOp::Ursh: + if (shift) { + masm.rshift64(Imm32(shift), ToRegister64(lhs)); } break; default: MOZ_CRASH("Unexpected shift op"); } + return; + } + + Register shift = ToRegister(rhs); +#ifdef JS_CODEGEN_X86 + MOZ_ASSERT(shift == ecx); +#endif + switch (lir->bitop()) { + case JSOp::Lsh: + masm.lshift64(shift, ToRegister64(lhs)); + break; + case JSOp::Rsh: + masm.rshift64Arithmetic(shift, ToRegister64(lhs)); + break; + case JSOp::Ursh: + masm.rshift64(shift, ToRegister64(lhs)); + break; + default: + MOZ_CRASH("Unexpected shift op"); } } void CodeGenerator::visitUrshD(LUrshD* ins) { Register lhs = ToRegister(ins->lhs()); + MOZ_ASSERT(ToRegister(ins->temp0()) == lhs); + const LAllocation* rhs = ins->rhs(); FloatRegister out = ToFloatRegister(ins->output()); - Register temp = ToRegister(ins->temp0()); if (rhs->isConstant()) { - MOZ_ASSERT(temp == lhs); - int32_t shift = ToInt32(rhs) & 0x1F; if (shift) { masm.shrl(Imm32(shift), lhs); } } else { - MOZ_ASSERT_IF(temp != lhs, Assembler::HasBMI2()); - Register shift = ToRegister(rhs); - if (temp != lhs) { - masm.shrxl(lhs, shift, temp); - } else { - masm.rshift32(shift, lhs); - } + masm.rshift32(shift, lhs); } - masm.convertUInt32ToDouble(temp, out); + masm.convertUInt32ToDouble(lhs, out); } Operand CodeGeneratorX86Shared::ToOperand(const LAllocation& a) { @@ -1635,6 +1686,10 @@ Operand CodeGeneratorX86Shared::ToOperand(const LAllocation* a) { return ToOperand(*a); } +Operand CodeGeneratorX86Shared::ToOperand(const LDefinition* def) { + return ToOperand(def->output()); +} + MoveOperand CodeGeneratorX86Shared::toMoveOperand(LAllocation a) const { if (a.isGeneralReg()) { return MoveOperand(ToRegister(a)); @@ -1813,24 +1868,16 @@ void CodeGenerator::visitNegI64(LNegI64* ins) { void CodeGenerator::visitNegD(LNegD* ins) { FloatRegister input = ToFloatRegister(ins->input()); - FloatRegister output = ToFloatRegister(ins->output()); + MOZ_ASSERT(input == ToFloatRegister(ins->output())); - ScratchDoubleScope scratch(masm); - masm.loadConstantDouble(-0.0, scratch); - - // XOR the float in a float register with -0.0. - masm.vxorpd(scratch, input, output); // s ^ 0x80000000000000 + masm.negateDouble(input); } void CodeGenerator::visitNegF(LNegF* ins) { FloatRegister input = ToFloatRegister(ins->input()); - FloatRegister output = ToFloatRegister(ins->output()); - - ScratchDoubleScope scratch(masm); - masm.loadConstantFloat32(-0.0f, scratch); + MOZ_ASSERT(input == ToFloatRegister(ins->output())); - // XOR the float in a float register with -0.0. - masm.vxorpd(scratch, input, output); // s ^ 0x80000000000000 + masm.negateFloat(input); } void CodeGenerator::visitCompareExchangeTypedArrayElement( diff --git a/js/src/jit/x86-shared/CodeGenerator-x86-shared.h b/js/src/jit/x86-shared/CodeGenerator-x86-shared.h @@ -35,6 +35,7 @@ class CodeGeneratorX86Shared : public CodeGeneratorShared { Operand ToOperand(const LAllocation& a); Operand ToOperand(const LAllocation* a); + Operand ToOperand(const LDefinition* def); #ifdef JS_PUNBOX64 Operand ToOperandOrRegister64(const LInt64Allocation& input); diff --git a/js/src/jit/x86-shared/Lowering-x86-shared.cpp b/js/src/jit/x86-shared/Lowering-x86-shared.cpp @@ -68,25 +68,64 @@ void LIRGeneratorX86Shared::lowerForShift(LInstructionHelper<1, 2, 0>* ins, if (rhs->isConstant()) { ins->setOperand(1, useOrConstantAtStart(rhs)); - defineReuseInput(ins, mir, 0); } else if (!mir->isRotate()) { - if (Assembler::HasBMI2()) { - ins->setOperand(1, useRegisterAtStart(rhs)); - define(ins, mir); - } else { - ins->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs) - ? useShiftRegister(rhs) - : useShiftRegisterAtStart(rhs)); - defineReuseInput(ins, mir, 0); - } + ins->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs) + ? useShiftRegister(rhs) + : useShiftRegisterAtStart(rhs)); } else { ins->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs) ? useFixed(rhs, ecx) : useFixedAtStart(rhs, ecx)); - defineReuseInput(ins, mir, 0); } + + defineReuseInput(ins, mir, 0); } +template <class LInstr> +void LIRGeneratorX86Shared::lowerForShiftInt64(LInstr* ins, MDefinition* mir, + MDefinition* lhs, + MDefinition* rhs) { + LAllocation rhsAlloc; + if (rhs->isConstant()) { + rhsAlloc = useOrConstantAtStart(rhs); +#ifdef JS_CODEGEN_X64 + } else if (std::is_same_v<LInstr, LShiftI64>) { + rhsAlloc = useShiftRegister(rhs); + } else { + rhsAlloc = useFixed(rhs, rcx); + } +#else + } else { + // The operands are int64, but we only care about the lower 32 bits of + // the RHS. On 32-bit, the code below will load that part in ecx and + // will discard the upper half. + rhsAlloc = useLowWordFixed(rhs, ecx); + } +#endif + + if constexpr (std::is_same_v<LInstr, LShiftI64>) { + ins->setLhs(useInt64RegisterAtStart(lhs)); + ins->setRhs(rhsAlloc); + defineInt64ReuseInput(ins, mir, LShiftI64::LhsIndex); + } else { + ins->setInput(useInt64RegisterAtStart(lhs)); + ins->setCount(rhsAlloc); +#if defined(JS_NUNBOX32) + ins->setTemp0(temp()); +#endif + defineInt64ReuseInput(ins, mir, LRotateI64::InputIndex); + } +} + +template void LIRGeneratorX86Shared::lowerForShiftInt64(LShiftI64* ins, + MDefinition* mir, + MDefinition* lhs, + MDefinition* rhs); +template void LIRGeneratorX86Shared::lowerForShiftInt64(LRotateI64* ins, + MDefinition* mir, + MDefinition* lhs, + MDefinition* rhs); + void LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir, MDefinition* input) { ins->setOperand(0, useRegisterAtStart(input)); @@ -96,35 +135,26 @@ void LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 1, 0>* ins, void LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir, MDefinition* lhs, MDefinition* rhs) { + ins->setOperand(0, useRegisterAtStart(lhs)); + ins->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs) + ? useOrConstant(rhs) + : useOrConstantAtStart(rhs)); if (MOZ_UNLIKELY(mir->isAdd() && mir->type() == MIRType::Int32 && - rhs->isConstant() && !mir->toAdd()->fallible())) { + mir->getOperand(1)->isConstant() && + !mir->toAdd()->fallible())) { // Special case instruction that is widely used in Wasm during address // calculation. And x86 platform has LEA instruction for it. // See CodeGenerator::visitAddI for codegen. - ins->setOperand(0, useRegisterAtStart(lhs)); - ins->setOperand(1, useOrConstantAtStart(rhs)); define(ins, mir); return; } - - ins->setOperand(0, useRegisterAtStart(lhs)); - ins->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs) - ? useOrConstant(rhs) - : useOrConstantAtStart(rhs)); defineReuseInput(ins, mir, 0); } void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir, MDefinition* input) { - // Without AVX, we'll need to use the x86 encodings where the input must be - // the same location as the output. - if (!Assembler::HasAVX()) { - ins->setOperand(0, useRegisterAtStart(input)); - defineReuseInput(ins, mir, 0); - } else { - ins->setOperand(0, useRegisterAtStart(input)); - define(ins, mir); - } + ins->setOperand(0, useRegisterAtStart(input)); + defineReuseInput(ins, mir, 0); } void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, 0>* ins, @@ -146,21 +176,12 @@ void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, 0>* ins, void LIRGeneratorX86Shared::lowerMulI(MMul* mul, MDefinition* lhs, MDefinition* rhs) { - if (rhs->isConstant()) { - auto* lir = new (alloc()) LMulI(useRegisterAtStart(lhs), - useOrConstantAtStart(rhs), LAllocation()); - if (mul->fallible()) { - assignSnapshot(lir, mul->bailoutKind()); - } - define(lir, mul); - return; - } - // Note: If we need a negative zero check, lhs is used twice. LAllocation lhsCopy = mul->canBeNegativeZero() ? use(lhs) : LAllocation(); LMulI* lir = new (alloc()) LMulI(useRegisterAtStart(lhs), - willHaveDifferentLIRNodes(lhs, rhs) ? use(rhs) : useAtStart(rhs), + willHaveDifferentLIRNodes(lhs, rhs) ? useOrConstant(rhs) + : useOrConstantAtStart(rhs), lhsCopy); if (mul->fallible()) { assignSnapshot(lir, mul->bailoutKind()); @@ -433,21 +454,19 @@ void LIRGeneratorX86Shared::lowerUrshD(MUrsh* mir) { MOZ_ASSERT(rhs->type() == MIRType::Int32); MOZ_ASSERT(mir->type() == MIRType::Double); +#ifdef JS_CODEGEN_X64 + static_assert(ecx == rcx); +#endif + LUse lhsUse = useRegisterAtStart(lhs); LAllocation rhsAlloc; - LDefinition tempDef; if (rhs->isConstant()) { rhsAlloc = useOrConstant(rhs); - tempDef = tempCopy(lhs, 0); - } else if (Assembler::HasBMI2()) { - rhsAlloc = useRegisterAtStart(rhs); - tempDef = temp(); } else { rhsAlloc = useShiftRegister(rhs); - tempDef = tempCopy(lhs, 0); } - auto* lir = new (alloc()) LUrshD(lhsUse, rhsAlloc, tempDef); + LUrshD* lir = new (alloc()) LUrshD(lhsUse, rhsAlloc, tempCopy(lhs, 0)); define(lir, mir); } diff --git a/js/src/jit/x86-shared/Lowering-x86-shared.h b/js/src/jit/x86-shared/Lowering-x86-shared.h @@ -32,6 +32,10 @@ class LIRGeneratorX86Shared : public LIRGeneratorShared { void lowerForALU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir, MDefinition* lhs, MDefinition* rhs); + template <class LInstr> + void lowerForShiftInt64(LInstr* ins, MDefinition* mir, MDefinition* lhs, + MDefinition* rhs); + void lowerForFPU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir, MDefinition* input); void lowerForFPU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir, diff --git a/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h b/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h @@ -293,15 +293,18 @@ void MacroAssembler::neg32(Register reg) { negl(reg); } void MacroAssembler::negateFloat(FloatRegister reg) { ScratchFloat32Scope scratch(*this); - loadConstantFloat32(-0.0f, scratch); + vpcmpeqw(Operand(scratch), scratch, scratch); + vpsllq(Imm32(31), scratch, scratch); // XOR the float in a float register with -0.0. vxorps(scratch, reg, reg); // s ^ 0x80000000 } void MacroAssembler::negateDouble(FloatRegister reg) { + // From MacroAssemblerX86Shared::maybeInlineDouble ScratchDoubleScope scratch(*this); - loadConstantDouble(-0.0, scratch); + vpcmpeqw(Operand(scratch), scratch, scratch); + vpsllq(Imm32(63), scratch, scratch); // XOR the float in a float register with -0.0. vxorpd(scratch, reg, reg); // s ^ 0x80000000000000 diff --git a/js/src/jit/x86/CodeGenerator-x86.cpp b/js/src/jit/x86/CodeGenerator-x86.cpp @@ -813,45 +813,6 @@ void CodeGeneratorX86::visitOutOfLineTruncateFloat32( masm.jump(ool->rejoin()); } -void CodeGenerator::visitMulI64(LMulI64* lir) { - Register64 lhs = ToRegister64(lir->lhs()); - LInt64Allocation rhs = lir->rhs(); - - MOZ_ASSERT(ToOutRegister64(lir) == lhs); - - if (IsConstant(rhs)) { - int64_t constant = ToInt64(rhs); - switch (constant) { - case -1: - masm.neg64(lhs); - return; - case 0: - masm.xor64(lhs, lhs); - return; - case 1: - // nop - return; - case 2: - masm.add64(lhs, lhs); - return; - default: - if (constant > 0) { - // Use shift if constant is power of 2. - int32_t shift = mozilla::FloorLog2(constant); - if (int64_t(1) << shift == constant) { - masm.lshift64(Imm32(shift), lhs); - return; - } - } - Register temp = ToTempRegisterOrInvalid(lir->temp0()); - masm.mul64(Imm64(constant), lhs, temp); - } - } else { - Register temp = ToTempRegisterOrInvalid(lir->temp0()); - masm.mul64(ToOperandOrRegister64(rhs), lhs, temp); - } -} - void CodeGenerator::visitDivOrModI64(LDivOrModI64* lir) { MOZ_ASSERT(gen->compilingWasm()); MOZ_ASSERT(ToRegister(lir->instance()) == InstanceReg); @@ -996,113 +957,6 @@ void CodeGeneratorX86::emitBigIntPtrMod(LBigIntPtrMod* ins, Register dividend, masm.idiv(divisor); } -void CodeGenerator::visitShiftIntPtr(LShiftIntPtr* ins) { - Register lhs = ToRegister(ins->lhs()); - const LAllocation* rhs = ins->rhs(); - Register out = ToRegister(ins->output()); - - if (rhs->isConstant()) { - MOZ_ASSERT(out == lhs); - - int32_t shift = ToIntPtr(rhs) & 0x1F; - switch (ins->bitop()) { - case JSOp::Lsh: - if (shift) { - masm.lshiftPtr(Imm32(shift), lhs); - } - break; - case JSOp::Rsh: - if (shift) { - masm.rshiftPtrArithmetic(Imm32(shift), lhs); - } - break; - case JSOp::Ursh: - if (shift) { - masm.rshiftPtr(Imm32(shift), lhs); - } - break; - default: - MOZ_CRASH("Unexpected shift op"); - } - } else { - Register shift = ToRegister(rhs); - MOZ_ASSERT_IF(out != lhs, Assembler::HasBMI2()); - - switch (ins->bitop()) { - case JSOp::Lsh: - if (out != lhs) { - masm.shlxl(lhs, shift, out); - } else { - masm.lshiftPtr(shift, lhs); - } - break; - case JSOp::Rsh: - if (out != lhs) { - masm.sarxl(lhs, shift, out); - } else { - masm.rshiftPtrArithmetic(shift, lhs); - } - break; - case JSOp::Ursh: - if (out != lhs) { - masm.shrxl(lhs, shift, out); - } else { - masm.rshiftPtr(shift, lhs); - } - break; - default: - MOZ_CRASH("Unexpected shift op"); - } - } -} - -void CodeGenerator::visitShiftI64(LShiftI64* lir) { - Register64 lhs = ToRegister64(lir->lhs()); - const LAllocation* rhs = lir->rhs(); - - MOZ_ASSERT(ToOutRegister64(lir) == lhs); - - if (rhs->isConstant()) { - int32_t shift = int32_t(rhs->toConstant()->toInt64() & 0x3F); - switch (lir->bitop()) { - case JSOp::Lsh: - if (shift) { - masm.lshift64(Imm32(shift), lhs); - } - break; - case JSOp::Rsh: - if (shift) { - masm.rshift64Arithmetic(Imm32(shift), lhs); - } - break; - case JSOp::Ursh: - if (shift) { - masm.rshift64(Imm32(shift), lhs); - } - break; - default: - MOZ_CRASH("Unexpected shift op"); - } - return; - } - - Register shift = ToRegister(rhs); - MOZ_ASSERT(shift == ecx); - switch (lir->bitop()) { - case JSOp::Lsh: - masm.lshift64(shift, lhs); - break; - case JSOp::Rsh: - masm.rshift64Arithmetic(shift, lhs); - break; - case JSOp::Ursh: - masm.rshift64(shift, lhs); - break; - default: - MOZ_CRASH("Unexpected shift op"); - } -} - void CodeGenerator::visitWasmSelectI64(LWasmSelectI64* lir) { MOZ_ASSERT(lir->mir()->type() == MIRType::Int64); diff --git a/js/src/jit/x86/Lowering-x86.cpp b/js/src/jit/x86/Lowering-x86.cpp @@ -226,40 +226,6 @@ void LIRGeneratorX86::lowerForMulInt64(LMulI64* ins, MMul* mir, LAllocation(AnyRegister(eax)))); } -template <class LInstr> -void LIRGeneratorX86::lowerForShiftInt64(LInstr* ins, MDefinition* mir, - MDefinition* lhs, MDefinition* rhs) { - LAllocation rhsAlloc; - if (rhs->isConstant()) { - rhsAlloc = useOrConstantAtStart(rhs); - } else { - // The operands are int64, but we only care about the lower 32 bits of the - // RHS. The code below will load that part in ecx and will discard the upper - // half. - rhsAlloc = useLowWordFixed(rhs, ecx); - } - - if constexpr (std::is_same_v<LInstr, LShiftI64>) { - ins->setLhs(useInt64RegisterAtStart(lhs)); - ins->setRhs(rhsAlloc); - defineInt64ReuseInput(ins, mir, LShiftI64::LhsIndex); - } else { - ins->setInput(useInt64RegisterAtStart(lhs)); - ins->setCount(rhsAlloc); - ins->setTemp0(temp()); - defineInt64ReuseInput(ins, mir, LRotateI64::InputIndex); - } -} - -template void LIRGeneratorX86::lowerForShiftInt64(LShiftI64* ins, - MDefinition* mir, - MDefinition* lhs, - MDefinition* rhs); -template void LIRGeneratorX86::lowerForShiftInt64(LRotateI64* ins, - MDefinition* mir, - MDefinition* lhs, - MDefinition* rhs); - void LIRGenerator::visitCompareExchangeTypedArrayElement( MCompareExchangeTypedArrayElement* ins) { MOZ_ASSERT(ins->elements()->type() == MIRType::Elements); diff --git a/js/src/jit/x86/Lowering-x86.h b/js/src/jit/x86/Lowering-x86.h @@ -50,10 +50,6 @@ class LIRGeneratorX86 : public LIRGeneratorX86Shared { void lowerForMulInt64(LMulI64* ins, MMul* mir, MDefinition* lhs, MDefinition* rhs); - template <class LInstr> - void lowerForShiftInt64(LInstr* ins, MDefinition* mir, MDefinition* lhs, - MDefinition* rhs); - void lowerTruncateDToInt32(MTruncateToInt32* ins); void lowerTruncateFToInt32(MTruncateToInt32* ins); void lowerBuiltinInt64ToFloatingPoint(MBuiltinInt64ToFloatingPoint* ins);