tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit 130494460295b72c62f2d7c6763cb00c89189a85
parent 979b5c3d12aa2503aabdff77ec4c8f71ed3b9d37
Author: André Bargull <andre.bargull@gmail.com>
Date:   Mon, 27 Oct 2025 09:00:01 +0000

Bug 1996345 - Part 4: Use three operand imul instruction for Int64 when possible. r=spidermonkey-reviewers,iain

Applies the changes from part 1 for Int64 multiplication.

Differential Revision: https://phabricator.services.mozilla.com/D270024

Diffstat:
Mjs/src/jit-test/tests/wasm/binop-x64-ion-codegen.js | 50+++++++++++++++++++++++++++++++++++++++++++-------
Mjs/src/jit-test/tests/wasm/ion-adhoc-multiplatform.js | 43+++++++++++++++----------------------------
Mjs/src/jit/LIROps.yaml | 2+-
Mjs/src/jit/x64/CodeGenerator-x64.cpp | 80+++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------
Mjs/src/jit/x64/Lowering-x64.cpp | 14++++++++++++--
5 files changed, 129 insertions(+), 60 deletions(-)

diff --git a/js/src/jit-test/tests/wasm/binop-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/binop-x64-ion-codegen.js @@ -47,7 +47,7 @@ let zero64 = `(module codegenTestX64_adhoc( zero64, 'f', - 'xor %rax, %rax', {no_prefix:true}); + 'xor %rax, %rax'); assertEq(wasmEvalText(zero64).exports.f(-37000000000n), 0n) assertEq(wasmEvalText(zero64).exports.f(42000000000n), 0n) @@ -74,7 +74,7 @@ codegenTestX64_adhoc( assertEq(wasmEvalText(one64).exports.f(-37000000000n), -37000000000n) assertEq(wasmEvalText(one64).exports.f(42000000000n), 42000000000n) -// Test that multiplication by two yields lea/add +// Test that multiplication by two yields lea let double32 = `(module @@ -93,11 +93,11 @@ let double64 = `(module codegenTestX64_adhoc( double64, 'f', - 'add %rax, %rax', {no_prefix:true}); + 'lea \\(%rdi,%rdi,1\\), %rax'); assertEq(wasmEvalText(double64).exports.f(-37000000000n), -74000000000n) assertEq(wasmEvalText(double64).exports.f(42000000000n), 84000000000n) -// Test that multiplication by four yields lea/shift +// Test that multiplication by four yields lea let quad32 = `(module @@ -116,11 +116,11 @@ let quad64 = `(module codegenTestX64_adhoc( quad64, 'f', - 'shl \\$0x02, %rax', {no_prefix:true}); + 'lea \\(,%rdi,4\\), %rax'); assertEq(wasmEvalText(quad64).exports.f(-37000000000n), -148000000000n) assertEq(wasmEvalText(quad64).exports.f(42000000000n), 168000000000n) -// Test that multiplication by five yields lea/imul +// Test that multiplication by five yields lea let quint32 = `(module @@ -139,10 +139,46 @@ let quint64 = `(module codegenTestX64_adhoc( quint64, 'f', - `imul \\$0x05, %rax, %rax`, {no_prefix:true}) + `lea \\(%rdi,%rdi,4\\), %rax`) assertEq(wasmEvalText(quint64).exports.f(-37000000000n), -37000000000n*5n) assertEq(wasmEvalText(quint64).exports.f(42000000000n), 42000000000n*5n) +// Test that multiplication by six yields imul + +let sext32 = + `(module + (func (export "f") (param i32) (result i32) + (i32.mul (local.get 0) (i32.const 6))))`; +codegenTestX64_adhoc( + sext32, + 'f', + 'imul \\$0x06, %edi, %eax'); +assertEq(wasmEvalText(sext32).exports.f(-37), -37*6) +assertEq(wasmEvalText(sext32).exports.f(42), 42*6) + +let sext64 = `(module + (func (export "f") (param i64) (result i64) + (i64.mul (local.get 0) (i64.const 6))))` +codegenTestX64_adhoc( + sext64, + 'f', + `imul \\$0x06, %rdi, %rax`) +assertEq(wasmEvalText(sext64).exports.f(-37000000000n), -37000000000n*6n) +assertEq(wasmEvalText(sext64).exports.f(42000000000n), 42000000000n*6n) + +// Test that multiplication by UINT32_MAX yields imul + +let uint32max64 = `(module + (func (export "f") (param i64) (result i64) + (i64.mul (local.get 0) (i64.const 0xffffffff))))` +codegenTestX64_adhoc( + uint32max64, + 'f', + `mov \\$-0x01, %r11d + imul %r11, %rax`, {no_prefix:true}) +assertEq(wasmEvalText(uint32max64).exports.f(-37000000000n), BigInt.asIntN(64, -37000000000n*0xffffffffn)) +assertEq(wasmEvalText(uint32max64).exports.f(42000000000n), BigInt.asIntN(64, 42000000000n*0xffffffffn)) + // Test that 0-n yields negation. let subneg32 = diff --git a/js/src/jit-test/tests/wasm/ion-adhoc-multiplatform.js b/js/src/jit-test/tests/wasm/ion-adhoc-multiplatform.js @@ -40,18 +40,8 @@ codegenTestMultiplatform_adhoc( `(module (func (export "mul64_zeroL") (param $p1 i64) (result i64) (i64.mul (i64.const 0) (local.get $p1))))`, "mul64_zeroL", - // FIXME folding happened, zero-creation insns could be improved - {x64: // First we move rdi to rax unnecessarily via rcx (bug 1752520), - // then we overwrite rax. Presumably because the folding - // 0 * x => 0 is done at the LIR level, not the MIR level, hence - // the now-pointless WasmParameter node is not DCE'd away, since - // DCE only happens at the MIR level. In fact all targets suffer - // from the latter problem, but on x86 no_prefix_x86:true - // hides it, and on arm32/64 the pointless move is correctly - // transformed by RA into a no-op. Also, on xor, REX.W is redundant. - `mov %rdi, %rcx - mov %rcx, %rax - xor %rax, %rax`, + // FIXME zero-creation insns could be improved + {x64: `xor %rax, %rax`, // REX.W is redundant x86: `xor %eax, %eax xor %edx, %edx`, arm64: `mov x0, xzr`, @@ -64,7 +54,14 @@ codegenTestMultiplatform_adhoc( `(module (func (export "mul32_oneL") (param $p1 i32) (result i32) (i32.mul (i32.const 1) (local.get $p1))))`, "mul32_oneL", - {x64: `mov %edi, %ecx + {x64: // We move edi to eax unnecessarily via ecx (bug 1752520). + // Presumably because the folding 1 * x => x is done at the LIR + // level, not the MIR level, hence the now-pointless WasmParameter + // node is not DCE'd away, since DCE only happens at the MIR level. + // In fact all targets suffer from the latter problem, but on x86 + // no_prefix_x86:true hides it, and on arm32/64 the pointless move + // is correctly transformed by RA into a no-op. + `mov %edi, %ecx mov %ecx, %eax`, x86: `movl 0x10\\(%rbp\\), %eax`, arm64: ``, @@ -125,9 +122,7 @@ codegenTestMultiplatform_adhoc( `(module (func (export "mul64_twoL") (param $p1 i64) (result i64) (i64.mul (i64.const 2) (local.get $p1))))`, "mul64_twoL", - {x64: `mov %rdi, %rcx - mov %rcx, %rax - add %rax, %rax`, + {x64: `lea \\(%rdi,%rdi,1\\), %rax`, x86: `movl 0x14\\(%rbp\\), %edx movl 0x10\\(%rbp\\), %eax add %eax, %eax @@ -153,9 +148,7 @@ codegenTestMultiplatform_adhoc( `(module (func (export "mul64_fourL") (param $p1 i64) (result i64) (i64.mul (i64.const 4) (local.get $p1))))`, "mul64_fourL", - {x64: `mov %rdi, %rcx - mov %rcx, %rax - shl \\$0x02, %rax`, + {x64: `lea \\(,%rdi,4\\), %rax`, x86: `movl 0x14\\(%rbp\\), %edx movl 0x10\\(%rbp\\), %eax shld \\$0x02, %eax, %edx @@ -189,9 +182,7 @@ codegenTestMultiplatform_adhoc( `(module (func (export "mul64_zeroR") (param $p1 i64) (result i64) (i64.mul (local.get $p1) (i64.const 0))))`, "mul64_zeroR", - {x64: `mov %rdi, %rcx - mov %rcx, %rax - xor %rax, %rax`, // REX.W is redundant + {x64: `xor %rax, %rax`, // REX.W is redundant x86: `xor %eax, %eax xor %edx, %edx`, arm64: `mov x0, xzr`, @@ -265,9 +256,7 @@ codegenTestMultiplatform_adhoc( `(module (func (export "mul64_twoR") (param $p1 i64) (result i64) (i64.mul (local.get $p1) (i64.const 2))))`, "mul64_twoR", - {x64: `mov %rdi, %rcx - mov %rcx, %rax - add %rax, %rax`, + {x64: `lea \\(%rdi,%rdi,1\\), %rax`, x86: `movl 0x14\\(%rbp\\), %edx movl 0x10\\(%rbp\\), %eax add %eax, %eax @@ -293,9 +282,7 @@ codegenTestMultiplatform_adhoc( `(module (func (export "mul64_fourR") (param $p1 i64) (result i64) (i64.mul (local.get $p1) (i64.const 4))))`, "mul64_fourR", - {x64: `mov %rdi, %rcx - mov %rcx, %rax - shl \\$0x02, %rax`, + {x64: `lea \\(,%rdi,4\\), %rax`, x86: `movl 0x14\\(%rbp\\), %edx movl 0x10\\(%rbp\\), %eax shld \\$0x02, %eax, %edx diff --git a/js/src/jit/LIROps.yaml b/js/src/jit/LIROps.yaml @@ -1351,7 +1351,7 @@ operands: lhs: Int64 rhs: Int64 -#if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM) +#if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_ARM) num_temps: 1 #endif defer_init: true diff --git a/js/src/jit/x64/CodeGenerator-x64.cpp b/js/src/jit/x64/CodeGenerator-x64.cpp @@ -123,41 +123,77 @@ void CodeGenerator::visitUnbox(LUnbox* unbox) { } void CodeGenerator::visitMulI64(LMulI64* lir) { - Register64 lhs = ToRegister64(lir->lhs()); + Register lhs = ToRegister64(lir->lhs()).reg; LInt64Allocation rhs = lir->rhs(); - - MOZ_ASSERT(ToOutRegister64(lir) == lhs); + Register out = ToOutRegister64(lir).reg; if (IsConstant(rhs)) { int64_t constant = ToInt64(rhs); switch (constant) { case -1: - masm.neg64(lhs); - return; + if (lhs != out) { + masm.movq(lhs, out); + } + masm.negq(out); + break; case 0: - masm.xor64(lhs, lhs); - return; + masm.xorq(out, out); + break; case 1: - // nop - return; + if (lhs != out) { + masm.movq(lhs, out); + } + break; case 2: - masm.add64(lhs, lhs); - return; - default: - if (constant > 0) { - // Use shift if constant is power of 2. - int32_t shift = mozilla::FloorLog2(constant); - if (int64_t(1) << shift == constant) { - masm.lshift64(Imm32(shift), lhs); - return; + if (lhs == out) { + masm.addq(lhs, lhs); + } else { + masm.lea(Operand(lhs, lhs, TimesOne), out); + } + break; + case 3: + masm.lea(Operand(lhs, lhs, TimesTwo), out); + break; + case 4: + if (lhs == out) { + masm.shlq(Imm32(2), lhs); + } else { + masm.lea(Operand(lhs, TimesFour, 0), out); + } + break; + case 5: + masm.lea(Operand(lhs, lhs, TimesFour), out); + break; + case 8: + if (lhs == out) { + masm.shlq(Imm32(3), lhs); + } else { + masm.lea(Operand(lhs, TimesEight, 0), out); + } + break; + case 9: + masm.lea(Operand(lhs, lhs, TimesEight), out); + break; + default: { + // Use shift if constant is power of 2. + int32_t shift = mozilla::FloorLog2(constant); + if (constant > 0 && (1 << shift) == constant) { + if (lhs != out) { + masm.movq(lhs, out); } + masm.shlq(Imm32(shift), out); + } else if (int32_t(constant) == constant) { + masm.imulq(Imm32(constant), lhs, out); + } else { + MOZ_ASSERT(out == lhs); + masm.mul64(Imm64(constant), Register64(lhs)); } - Register temp = ToTempRegisterOrInvalid(lir->temp0()); - masm.mul64(Imm64(constant), lhs, temp); + break; + } } } else { - Register temp = ToTempRegisterOrInvalid(lir->temp0()); - masm.mul64(ToOperandOrRegister64(rhs), lhs, temp); + MOZ_ASSERT(out == lhs); + masm.imulq(ToOperandOrRegister64(rhs), lhs); } } diff --git a/js/src/jit/x64/Lowering-x64.cpp b/js/src/jit/x64/Lowering-x64.cpp @@ -62,12 +62,22 @@ void LIRGeneratorX64::lowerForALUInt64( void LIRGeneratorX64::lowerForMulInt64(LMulI64* ins, MMul* mir, MDefinition* lhs, MDefinition* rhs) { - // X64 doesn't need a temp for 64bit multiplication. + // No input reuse needed when we can use imulq with an int32 immediate. + bool reuseInput = true; + if (rhs->isConstant()) { + int64_t constant = rhs->toConstant()->toInt64(); + reuseInput = int32_t(constant) != constant; + } + ins->setLhs(useInt64RegisterAtStart(lhs)); ins->setRhs(willHaveDifferentLIRNodes(lhs, rhs) ? useInt64OrConstant(rhs) : useInt64OrConstantAtStart(rhs)); - defineInt64ReuseInput(ins, mir, 0); + if (reuseInput) { + defineInt64ReuseInput(ins, mir, 0); + } else { + defineInt64(ins, mir); + } } void LIRGenerator::visitBox(MBox* box) {