commit 4a6ce3e80e0e6f3b65ab63820616c6bd06a48e02
parent 585b1029673b6913762c9958a2f048272a145728
Author: André Bargull <andre.bargull@gmail.com>
Date: Mon, 27 Oct 2025 15:22:12 +0000
Bug 1996343: Use AVX encoding for unary lowerForFPU. r=spidermonkey-reviewers,iain
Codegen for:
```js
function f(x) {
return -x + x;
}
```
Was:
```asm
[Codegen] # LIR=Double
[Codegen] movsd .Lfrom42(%rip), %xmm1
[Codegen] # LIR=MoveGroup
[Codegen] vmovapd %xmm0, %xmm1
[Codegen] # LIR=NegD
[Codegen] pcmpeqw %xmm15, %xmm15
[Codegen] psllq $63, %xmm15
[Codegen] xorpd %xmm15, %xmm0
[Codegen] # LIR=MathD:Add
[Codegen] addsd %xmm1, %xmm0
```
And is now:
```asm
[Codegen] # LIR=Double
[Codegen] movsd .Lfrom42(%rip), %xmm1
[Codegen] # LIR=NegD
[Codegen] movsd .Lfrom51(%rip), %xmm15
[Codegen] vxorpd %xmm15, %xmm0, %xmm1
[Codegen] # LIR=MathD:Add
[Codegen] vaddsd %xmm0, %xmm1, %xmm0
```
Negative zero constants are now loaded using `loadConstant{Double,Float}` from
the constant pool. (The `pcmpeqw + psllq` sequence is probably a leftover
from before bug 876064 was implemented).
And for `LAbs{D,F}`:
```js
function f(x) {
return Math.abs(x) + x;
}
```
was compiled to:
```asm
[Codegen] # LIR=MoveGroup
[Codegen] vmovapd %xmm0, %xmm1
[Codegen] # LIR=AbsD
[Codegen] movsd .Lfrom47(%rip), %xmm15
[Codegen] andpd %xmm15, %xmm0
[Codegen] # LIR=MathD:Add
[Codegen] addsd %xmm1, %xmm0
```
And now it's:
```asm
[Codegen] # LIR=AbsD
[Codegen] movsd .Lfrom43(%rip), %xmm15
[Codegen] vandpd %xmm15, %xmm0, %xmm1
[Codegen] # LIR=MathD:Add
[Codegen] vaddsd %xmm0, %xmm1, %xmm0
```
Differential Revision: https://phabricator.services.mozilla.com/D270017
Diffstat:
3 files changed, 23 insertions(+), 11 deletions(-)
diff --git a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
@@ -1868,16 +1868,24 @@ void CodeGenerator::visitNegI64(LNegI64* ins) {
void CodeGenerator::visitNegD(LNegD* ins) {
FloatRegister input = ToFloatRegister(ins->input());
- MOZ_ASSERT(input == ToFloatRegister(ins->output()));
+ FloatRegister output = ToFloatRegister(ins->output());
+
+ ScratchDoubleScope scratch(masm);
+ masm.loadConstantDouble(-0.0, scratch);
- masm.negateDouble(input);
+ // XOR the float in a float register with -0.0.
+ masm.vxorpd(scratch, input, output); // s ^ 0x80000000000000
}
void CodeGenerator::visitNegF(LNegF* ins) {
FloatRegister input = ToFloatRegister(ins->input());
- MOZ_ASSERT(input == ToFloatRegister(ins->output()));
+ FloatRegister output = ToFloatRegister(ins->output());
+
+ ScratchDoubleScope scratch(masm);
+ masm.loadConstantFloat32(-0.0f, scratch);
- masm.negateFloat(input);
+ // XOR the float in a float register with -0.0.
+ masm.vxorpd(scratch, input, output); // s ^ 0x80000000000000
}
void CodeGenerator::visitCompareExchangeTypedArrayElement(
diff --git a/js/src/jit/x86-shared/Lowering-x86-shared.cpp b/js/src/jit/x86-shared/Lowering-x86-shared.cpp
@@ -153,8 +153,15 @@ void LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 2, 0>* ins,
void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 1, 0>* ins,
MDefinition* mir, MDefinition* input) {
- ins->setOperand(0, useRegisterAtStart(input));
- defineReuseInput(ins, mir, 0);
+ // Without AVX, we'll need to use the x86 encodings where the input must be
+ // the same location as the output.
+ if (!Assembler::HasAVX()) {
+ ins->setOperand(0, useRegisterAtStart(input));
+ defineReuseInput(ins, mir, 0);
+ } else {
+ ins->setOperand(0, useRegisterAtStart(input));
+ define(ins, mir);
+ }
}
void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, 0>* ins,
diff --git a/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h b/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h
@@ -293,18 +293,15 @@ void MacroAssembler::neg32(Register reg) { negl(reg); }
void MacroAssembler::negateFloat(FloatRegister reg) {
ScratchFloat32Scope scratch(*this);
- vpcmpeqw(Operand(scratch), scratch, scratch);
- vpsllq(Imm32(31), scratch, scratch);
+ loadConstantFloat32(-0.0f, scratch);
// XOR the float in a float register with -0.0.
vxorps(scratch, reg, reg); // s ^ 0x80000000
}
void MacroAssembler::negateDouble(FloatRegister reg) {
- // From MacroAssemblerX86Shared::maybeInlineDouble
ScratchDoubleScope scratch(*this);
- vpcmpeqw(Operand(scratch), scratch, scratch);
- vpsllq(Imm32(63), scratch, scratch);
+ loadConstantDouble(-0.0, scratch);
// XOR the float in a float register with -0.0.
vxorpd(scratch, reg, reg); // s ^ 0x80000000000000