[ tor-browser ].git.dasho

commit c47d8476b9d3c4c8876005a24977498820610119
parent 9092fb37353a5c2ebad147352cd096bb877565c9
Author: André Bargull <andre.bargull@gmail.com>
Date:   Wed, 29 Oct 2025 10:22:07 +0000

Bug 1996522 - Part 2: Avoid loading floating point constants in registers for abs, neg, and copysign. r=iain

Directly load from memory to avoid an unnecessary move through the scratch
register.

For `copySign{Double,Float}` only use memory operands when AVX is present to
keep the implementation a bit simpler. (The majority of users should have a CPU
which has AVX support.)

Differential Revision: https://phabricator.services.mozilla.com/D270126

Diffstat:
M js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp  | 10 ++--------
M js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h  | 25 +++++++------------------
M js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp  | 80 ++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------

3 files changed, 58 insertions(+), 57 deletions(-)
diff --git a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
@@ -1815,22 +1815,16 @@ void CodeGenerator::visitNegD(LNegD* ins) {
   FloatRegister input = ToFloatRegister(ins->input());
   FloatRegister output = ToFloatRegister(ins->output());
 
-  ScratchDoubleScope scratch(masm);
-  masm.loadConstantDouble(-0.0, scratch);
-
   // XOR the float in a float register with -0.0.
-  masm.vxorpd(scratch, input, output);  // s ^ 0x80000000000000
+  masm.vxorpdSimd128(SimdConstant::SplatX2(-0.0), input, output);
 }
 
 void CodeGenerator::visitNegF(LNegF* ins) {
   FloatRegister input = ToFloatRegister(ins->input());
   FloatRegister output = ToFloatRegister(ins->output());
 
-  ScratchDoubleScope scratch(masm);
-  masm.loadConstantFloat32(-0.0f, scratch);
-
   // XOR the float in a float register with -0.0.
-  masm.vxorpd(scratch, input, output);  // s ^ 0x80000000000000
+  masm.vxorpsSimd128(SimdConstant::SplatX4(-0.0f), input, output);
 }
 
 void CodeGenerator::visitCompareExchangeTypedArrayElement(
diff --git a/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h b/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h
@@ -9,6 +9,7 @@
 
 #include "jit/x86-shared/MacroAssembler-x86-shared.h"
 
+#include "mozilla/Casting.h"
 #include "mozilla/MathAlgorithms.h"
 
 namespace js {
@@ -292,19 +293,13 @@ void MacroAssembler::divDouble(FloatRegister src, FloatRegister dest) {
 void MacroAssembler::neg32(Register reg) { negl(reg); }
 
 void MacroAssembler::negateFloat(FloatRegister reg) {
-  ScratchFloat32Scope scratch(*this);
-  loadConstantFloat32(-0.0f, scratch);
-
   // XOR the float in a float register with -0.0.
-  vxorps(scratch, reg, reg);  // s ^ 0x80000000
+  vxorpsSimd128(SimdConstant::SplatX4(-0.0f), reg, reg);
 }
 
 void MacroAssembler::negateDouble(FloatRegister reg) {
-  ScratchDoubleScope scratch(*this);
-  loadConstantDouble(-0.0, scratch);
-
   // XOR the float in a float register with -0.0.
-  vxorpd(scratch, reg, reg);  // s ^ 0x80000000000000
+  vxorpdSimd128(SimdConstant::SplatX2(-0.0), reg, reg);
 }
 
 void MacroAssembler::abs32(Register src, Register dest) {
@@ -318,19 +313,13 @@ void MacroAssembler::abs32(Register src, Register dest) {
 }
 
 void MacroAssembler::absFloat32(FloatRegister src, FloatRegister dest) {
-  ScratchFloat32Scope scratch(*this);
-  loadConstantFloat32(mozilla::SpecificNaN<float>(
-                          0, mozilla::FloatingPoint<float>::kSignificandBits),
-                      scratch);
-  vandps(scratch, src, dest);
+  float clearSignMask = mozilla::BitwiseCast<float>(INT32_MAX);
+  vandpsSimd128(SimdConstant::SplatX4(clearSignMask), src, dest);
 }
 
 void MacroAssembler::absDouble(FloatRegister src, FloatRegister dest) {
-  ScratchDoubleScope scratch(*this);
-  loadConstantDouble(mozilla::SpecificNaN<double>(
-                         0, mozilla::FloatingPoint<double>::kSignificandBits),
-                     scratch);
-  vandpd(scratch, src, dest);
+  double clearSignMask = mozilla::BitwiseCast<double>(INT64_MAX);
+  vandpdSimd128(SimdConstant::SplatX2(clearSignMask), src, dest);
 }
 
 void MacroAssembler::sqrtFloat32(FloatRegister src, FloatRegister dest) {
diff --git a/js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp b/js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp
@@ -2228,24 +2228,33 @@ void MacroAssembler::copySignDouble(FloatRegister lhs, FloatRegister rhs,
                                     FloatRegister output) {
   ScratchDoubleScope scratch(*this);
 
-  // TODO Support AVX2
-  if (rhs == output) {
-    MOZ_ASSERT(lhs != rhs);
-    double keepSignMask = mozilla::BitwiseCast<double>(INT64_MIN);
-    loadConstantDouble(keepSignMask, scratch);
-    vandpd(scratch, rhs, output);
-
-    double clearSignMask = mozilla::BitwiseCast<double>(INT64_MAX);
-    loadConstantDouble(clearSignMask, scratch);
-    vandpd(lhs, scratch, scratch);
+  double keepSignMask = mozilla::BitwiseCast<double>(INT64_MIN);
+  double clearSignMask = mozilla::BitwiseCast<double>(INT64_MAX);
+
+  if (HasAVX()) {
+    if (rhs == output) {
+      MOZ_ASSERT(lhs != rhs);
+      vandpdSimd128(SimdConstant::SplatX2(keepSignMask), rhs, output);
+      vandpdSimd128(SimdConstant::SplatX2(clearSignMask), lhs, scratch);
+    } else {
+      vandpdSimd128(SimdConstant::SplatX2(clearSignMask), lhs, output);
+      vandpdSimd128(SimdConstant::SplatX2(keepSignMask), rhs, scratch);
+    }
   } else {
-    double clearSignMask = mozilla::BitwiseCast<double>(INT64_MAX);
-    loadConstantDouble(clearSignMask, scratch);
-    vandpd(scratch, lhs, output);
+    if (rhs == output) {
+      MOZ_ASSERT(lhs != rhs);
+      loadConstantDouble(keepSignMask, scratch);
+      vandpd(scratch, rhs, output);
 
-    double keepSignMask = mozilla::BitwiseCast<double>(INT64_MIN);
-    loadConstantDouble(keepSignMask, scratch);
-    vandpd(rhs, scratch, scratch);
+      loadConstantDouble(clearSignMask, scratch);
+      vandpd(lhs, scratch, scratch);
+    } else {
+      loadConstantDouble(clearSignMask, scratch);
+      vandpd(scratch, lhs, output);
+
+      loadConstantDouble(keepSignMask, scratch);
+      vandpd(rhs, scratch, scratch);
+    }
   }
 
   vorpd(scratch, output, output);
@@ -2255,24 +2264,33 @@ void MacroAssembler::copySignFloat32(FloatRegister lhs, FloatRegister rhs,
                                      FloatRegister output) {
   ScratchFloat32Scope scratch(*this);
 
-  // TODO Support AVX2
-  if (rhs == output) {
-    MOZ_ASSERT(lhs != rhs);
-    float keepSignMask = mozilla::BitwiseCast<float>(INT32_MIN);
-    loadConstantFloat32(keepSignMask, scratch);
-    vandps(scratch, output, output);
+  float keepSignMask = mozilla::BitwiseCast<float>(INT32_MIN);
+  float clearSignMask = mozilla::BitwiseCast<float>(INT32_MAX);
 
-    float clearSignMask = mozilla::BitwiseCast<float>(INT32_MAX);
-    loadConstantFloat32(clearSignMask, scratch);
-    vandps(lhs, scratch, scratch);
+  if (HasAVX()) {
+    if (rhs == output) {
+      MOZ_ASSERT(lhs != rhs);
+      vandpsSimd128(SimdConstant::SplatX4(keepSignMask), rhs, output);
+      vandpsSimd128(SimdConstant::SplatX4(clearSignMask), lhs, scratch);
+    } else {
+      vandpsSimd128(SimdConstant::SplatX4(clearSignMask), lhs, output);
+      vandpsSimd128(SimdConstant::SplatX4(keepSignMask), rhs, scratch);
+    }
   } else {
-    float clearSignMask = mozilla::BitwiseCast<float>(INT32_MAX);
-    loadConstantFloat32(clearSignMask, scratch);
-    vandps(scratch, lhs, output);
+    if (rhs == output) {
+      MOZ_ASSERT(lhs != rhs);
+      loadConstantFloat32(keepSignMask, scratch);
+      vandps(scratch, output, output);
 
-    float keepSignMask = mozilla::BitwiseCast<float>(INT32_MIN);
-    loadConstantFloat32(keepSignMask, scratch);
-    vandps(rhs, scratch, scratch);
+      loadConstantFloat32(clearSignMask, scratch);
+      vandps(lhs, scratch, scratch);
+    } else {
+      loadConstantFloat32(clearSignMask, scratch);
+      vandps(scratch, lhs, output);
+
+      loadConstantFloat32(keepSignMask, scratch);
+      vandps(rhs, scratch, scratch);
+    }
   }
 
   vorps(scratch, output, output);

	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE

M	js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp	\|	10	++--------
M	js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h	\|	25	+++++++------------------
M	js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp	\|	80	++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------