[ tor-browser ].git.dasho

commit 4a6ce3e80e0e6f3b65ab63820616c6bd06a48e02
parent 585b1029673b6913762c9958a2f048272a145728
Author: André Bargull <andre.bargull@gmail.com>
Date:   Mon, 27 Oct 2025 15:22:12 +0000

Bug 1996343: Use AVX encoding for unary lowerForFPU. r=spidermonkey-reviewers,iain

Codegen for:
```js
function f(x) {
  return -x + x;
}
```

Was:
```asm
[Codegen]                                 # LIR=Double
[Codegen] movsd      .Lfrom42(%rip), %xmm1
[Codegen]                                 # LIR=MoveGroup
[Codegen] vmovapd    %xmm0, %xmm1
[Codegen]                                 # LIR=NegD
[Codegen] pcmpeqw    %xmm15, %xmm15
[Codegen] psllq      $63, %xmm15
[Codegen] xorpd      %xmm15, %xmm0
[Codegen]                                 # LIR=MathD:Add
[Codegen] addsd      %xmm1, %xmm0
```

And is now:
```asm
[Codegen]                                 # LIR=Double
[Codegen] movsd      .Lfrom42(%rip), %xmm1
[Codegen]                                 # LIR=NegD
[Codegen] movsd      .Lfrom51(%rip), %xmm15
[Codegen] vxorpd     %xmm15, %xmm0, %xmm1
[Codegen]                                 # LIR=MathD:Add
[Codegen] vaddsd     %xmm0, %xmm1, %xmm0
```

Negative zero constants are now loaded using `loadConstant{Double,Float}` from
the constant pool. (The `pcmpeqw + psllq` sequence is probably a leftover
from before bug 876064 was implemented).

And for `LAbs{D,F}`:
```js
function f(x) {
  return Math.abs(x) + x;
}
```

was compiled to:
```asm
[Codegen]                                 # LIR=MoveGroup
[Codegen] vmovapd    %xmm0, %xmm1
[Codegen]                                 # LIR=AbsD
[Codegen] movsd      .Lfrom47(%rip), %xmm15
[Codegen] andpd      %xmm15, %xmm0
[Codegen]                                 # LIR=MathD:Add
[Codegen] addsd      %xmm1, %xmm0
```

And now it's:
```asm
[Codegen]                                 # LIR=AbsD
[Codegen] movsd      .Lfrom43(%rip), %xmm15
[Codegen] vandpd     %xmm15, %xmm0, %xmm1
[Codegen]                                 # LIR=MathD:Add
[Codegen] vaddsd     %xmm0, %xmm1, %xmm0
```

Differential Revision: https://phabricator.services.mozilla.com/D270017

Diffstat:
M js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp  | 16 ++++++++++++----
M js/src/jit/x86-shared/Lowering-x86-shared.cpp  | 11 +++++++++--
M js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h  | 7 ++-----

3 files changed, 23 insertions(+), 11 deletions(-)
diff --git a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
@@ -1868,16 +1868,24 @@ void CodeGenerator::visitNegI64(LNegI64* ins) {
 
 void CodeGenerator::visitNegD(LNegD* ins) {
   FloatRegister input = ToFloatRegister(ins->input());
-  MOZ_ASSERT(input == ToFloatRegister(ins->output()));
+  FloatRegister output = ToFloatRegister(ins->output());
+
+  ScratchDoubleScope scratch(masm);
+  masm.loadConstantDouble(-0.0, scratch);
 
-  masm.negateDouble(input);
+  // XOR the float in a float register with -0.0.
+  masm.vxorpd(scratch, input, output);  // s ^ 0x80000000000000
 }
 
 void CodeGenerator::visitNegF(LNegF* ins) {
   FloatRegister input = ToFloatRegister(ins->input());
-  MOZ_ASSERT(input == ToFloatRegister(ins->output()));
+  FloatRegister output = ToFloatRegister(ins->output());
+
+  ScratchDoubleScope scratch(masm);
+  masm.loadConstantFloat32(-0.0f, scratch);
 
-  masm.negateFloat(input);
+  // XOR the float in a float register with -0.0.
+  masm.vxorpd(scratch, input, output);  // s ^ 0x80000000000000
 }
 
 void CodeGenerator::visitCompareExchangeTypedArrayElement(
diff --git a/js/src/jit/x86-shared/Lowering-x86-shared.cpp b/js/src/jit/x86-shared/Lowering-x86-shared.cpp
@@ -153,8 +153,15 @@ void LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 2, 0>* ins,
 
 void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 1, 0>* ins,
                                         MDefinition* mir, MDefinition* input) {
-  ins->setOperand(0, useRegisterAtStart(input));
-  defineReuseInput(ins, mir, 0);
+  // Without AVX, we'll need to use the x86 encodings where the input must be
+  // the same location as the output.
+  if (!Assembler::HasAVX()) {
+    ins->setOperand(0, useRegisterAtStart(input));
+    defineReuseInput(ins, mir, 0);
+  } else {
+    ins->setOperand(0, useRegisterAtStart(input));
+    define(ins, mir);
+  }
 }
 
 void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, 0>* ins,
diff --git a/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h b/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h
@@ -293,18 +293,15 @@ void MacroAssembler::neg32(Register reg) { negl(reg); }
 
 void MacroAssembler::negateFloat(FloatRegister reg) {
   ScratchFloat32Scope scratch(*this);
-  vpcmpeqw(Operand(scratch), scratch, scratch);
-  vpsllq(Imm32(31), scratch, scratch);
+  loadConstantFloat32(-0.0f, scratch);
 
   // XOR the float in a float register with -0.0.
   vxorps(scratch, reg, reg);  // s ^ 0x80000000
 }
 
 void MacroAssembler::negateDouble(FloatRegister reg) {
-  // From MacroAssemblerX86Shared::maybeInlineDouble
   ScratchDoubleScope scratch(*this);
-  vpcmpeqw(Operand(scratch), scratch, scratch);
-  vpsllq(Imm32(63), scratch, scratch);
+  loadConstantDouble(-0.0, scratch);
 
   // XOR the float in a float register with -0.0.
   vxorpd(scratch, reg, reg);  // s ^ 0x80000000000000

	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE

M	js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp	\|	16	++++++++++++----
M	js/src/jit/x86-shared/Lowering-x86-shared.cpp	\|	11	+++++++++--
M	js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h	\|	7	++-----