tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit 4a6ce3e80e0e6f3b65ab63820616c6bd06a48e02
parent 585b1029673b6913762c9958a2f048272a145728
Author: André Bargull <andre.bargull@gmail.com>
Date:   Mon, 27 Oct 2025 15:22:12 +0000

Bug 1996343: Use AVX encoding for unary lowerForFPU. r=spidermonkey-reviewers,iain

Codegen for:
```js
function f(x) {
  return -x + x;
}
```

Was:
```asm
[Codegen]                                 # LIR=Double
[Codegen] movsd      .Lfrom42(%rip), %xmm1
[Codegen]                                 # LIR=MoveGroup
[Codegen] vmovapd    %xmm0, %xmm1
[Codegen]                                 # LIR=NegD
[Codegen] pcmpeqw    %xmm15, %xmm15
[Codegen] psllq      $63, %xmm15
[Codegen] xorpd      %xmm15, %xmm0
[Codegen]                                 # LIR=MathD:Add
[Codegen] addsd      %xmm1, %xmm0
```

And is now:
```asm
[Codegen]                                 # LIR=Double
[Codegen] movsd      .Lfrom42(%rip), %xmm1
[Codegen]                                 # LIR=NegD
[Codegen] movsd      .Lfrom51(%rip), %xmm15
[Codegen] vxorpd     %xmm15, %xmm0, %xmm1
[Codegen]                                 # LIR=MathD:Add
[Codegen] vaddsd     %xmm0, %xmm1, %xmm0
```

Negative zero constants are now loaded using `loadConstant{Double,Float}` from
the constant pool. (The `pcmpeqw + psllq` sequence is probably a leftover
from before bug 876064 was implemented).

And for `LAbs{D,F}`:
```js
function f(x) {
  return Math.abs(x) + x;
}
```

was compiled to:
```asm
[Codegen]                                 # LIR=MoveGroup
[Codegen] vmovapd    %xmm0, %xmm1
[Codegen]                                 # LIR=AbsD
[Codegen] movsd      .Lfrom47(%rip), %xmm15
[Codegen] andpd      %xmm15, %xmm0
[Codegen]                                 # LIR=MathD:Add
[Codegen] addsd      %xmm1, %xmm0
```

And now it's:
```asm
[Codegen]                                 # LIR=AbsD
[Codegen] movsd      .Lfrom43(%rip), %xmm15
[Codegen] vandpd     %xmm15, %xmm0, %xmm1
[Codegen]                                 # LIR=MathD:Add
[Codegen] vaddsd     %xmm0, %xmm1, %xmm0
```

Differential Revision: https://phabricator.services.mozilla.com/D270017

Diffstat:
Mjs/src/jit/x86-shared/CodeGenerator-x86-shared.cpp | 16++++++++++++----
Mjs/src/jit/x86-shared/Lowering-x86-shared.cpp | 11+++++++++--
Mjs/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h | 7++-----
3 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp @@ -1868,16 +1868,24 @@ void CodeGenerator::visitNegI64(LNegI64* ins) { void CodeGenerator::visitNegD(LNegD* ins) { FloatRegister input = ToFloatRegister(ins->input()); - MOZ_ASSERT(input == ToFloatRegister(ins->output())); + FloatRegister output = ToFloatRegister(ins->output()); + + ScratchDoubleScope scratch(masm); + masm.loadConstantDouble(-0.0, scratch); - masm.negateDouble(input); + // XOR the float in a float register with -0.0. + masm.vxorpd(scratch, input, output); // s ^ 0x80000000000000 } void CodeGenerator::visitNegF(LNegF* ins) { FloatRegister input = ToFloatRegister(ins->input()); - MOZ_ASSERT(input == ToFloatRegister(ins->output())); + FloatRegister output = ToFloatRegister(ins->output()); + + ScratchDoubleScope scratch(masm); + masm.loadConstantFloat32(-0.0f, scratch); - masm.negateFloat(input); + // XOR the float in a float register with -0.0. + masm.vxorpd(scratch, input, output); // s ^ 0x80000000000000 } void CodeGenerator::visitCompareExchangeTypedArrayElement( diff --git a/js/src/jit/x86-shared/Lowering-x86-shared.cpp b/js/src/jit/x86-shared/Lowering-x86-shared.cpp @@ -153,8 +153,15 @@ void LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 2, 0>* ins, void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir, MDefinition* input) { - ins->setOperand(0, useRegisterAtStart(input)); - defineReuseInput(ins, mir, 0); + // Without AVX, we'll need to use the x86 encodings where the input must be + // the same location as the output. + if (!Assembler::HasAVX()) { + ins->setOperand(0, useRegisterAtStart(input)); + defineReuseInput(ins, mir, 0); + } else { + ins->setOperand(0, useRegisterAtStart(input)); + define(ins, mir); + } } void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, 0>* ins, diff --git a/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h b/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h @@ -293,18 +293,15 @@ void MacroAssembler::neg32(Register reg) { negl(reg); } void MacroAssembler::negateFloat(FloatRegister reg) { ScratchFloat32Scope scratch(*this); - vpcmpeqw(Operand(scratch), scratch, scratch); - vpsllq(Imm32(31), scratch, scratch); + loadConstantFloat32(-0.0f, scratch); // XOR the float in a float register with -0.0. vxorps(scratch, reg, reg); // s ^ 0x80000000 } void MacroAssembler::negateDouble(FloatRegister reg) { - // From MacroAssemblerX86Shared::maybeInlineDouble ScratchDoubleScope scratch(*this); - vpcmpeqw(Operand(scratch), scratch, scratch); - vpsllq(Imm32(63), scratch, scratch); + loadConstantDouble(-0.0, scratch); // XOR the float in a float register with -0.0. vxorpd(scratch, reg, reg); // s ^ 0x80000000000000