[ tor-browser ].git.dasho

commit 4d2d424e3c1e9fc5df7162cd85d413627998727b
parent 81f5a1f4eb72c479f7759641ee564ee4c1eee038
Author: Henri Sivonen <hsivonen@hsivonen.fi>
Date:   Wed,  5 Nov 2025 10:25:53 +0000

Bug 1997859 - Accelerate ContainsMarkup in innerHTML setter with SIMD. r=smaug

Differential Revision: https://phabricator.services.mozilla.com/D271012

Diffstat:
M dom/base/FragmentOrElement.cpp  | 17 +++++++++++++++++
M parser/htmlaccel/htmlaccel.h  | 29 +++++++++++++++++++++++++++++
M parser/htmlaccel/htmlaccelNotInline.cpp  | 9 +++++++++
M parser/htmlaccel/htmlaccelNotInline.h  | 9 +++++++++

4 files changed, 64 insertions(+), 0 deletions(-)
diff --git a/dom/base/FragmentOrElement.cpp b/dom/base/FragmentOrElement.cpp
@@ -73,6 +73,8 @@
 #include "mozilla/dom/NodeListBinding.h"
 #include "mozilla/dom/SVGUseElement.h"
 #include "mozilla/dom/ShadowRoot.h"
+#include "mozilla/htmlaccel/htmlaccelEnabled.h"
+#include "mozilla/htmlaccel/htmlaccelNotInline.h"
 #include "nsCCUncollectableMarker.h"
 #include "nsChildContentList.h"
 #include "nsContentCreatorFunctions.h"
@@ -1917,6 +1919,21 @@ static bool ContainsMarkup(const nsAString& aStr) {
   const char16_t* start = aStr.BeginReading();
   const char16_t* end = aStr.EndReading();
 
+  if (mozilla::htmlaccel::htmlaccelEnabled()) {
+    // We need to check for the empty string in order to
+    // dereference `start` for the '<' check. We might as well
+    // check that we have a full SIMD stride.
+    if (end - start >= 16) {
+      // Optimize the case where the input starts with a tag.
+      if (*start == u'<') {
+        return true;
+      }
+      // Curiously, this doesn't look like much of an optimization on Zen 3,
+      // but since it is an optimization on M3 Pro and Skylake, let's do this.
+      return mozilla::htmlaccel::ContainsMarkup(start, end);
+    }
+  }
+
   while (start != end) {
     char16_t c = *start;
     if (c == char16_t('<') || c == char16_t('&') || c == char16_t('\r') ||
diff --git a/parser/htmlaccel/htmlaccel.h b/parser/htmlaccel/htmlaccel.h
@@ -313,6 +313,35 @@ MOZ_ALWAYS_INLINE_EVEN_DEBUG int32_t AccelerateTextNode(const char16_t* aInput,
   return int32_t(current - aInput);
 }
 
+MOZ_ALWAYS_INLINE_EVEN_DEBUG bool ContainsMarkup(const char16_t* aInput,
+                                                 const char16_t* aEnd) {
+  const char16_t* current = aInput;
+  while (aEnd - current >= 16) {
+    uint8x16_t mask = StrideToMask(current, ZERO_LT_AMP_CR, true);
+#if defined(__aarch64__)
+    uint8_t max = vmaxvq_u8(mask);
+    if (max != 0) {
+      return true;
+    }
+#else  // x86/x86_64
+    int int_mask = _mm_movemask_epi8(mask);
+    if (int_mask != 0) {
+      return true;
+    }
+#endif
+    current += 16;
+  }
+  while (current != aEnd) {
+    char16_t c = *current;
+    if (c == char16_t('<') || c == char16_t('&') || c == char16_t('\r') ||
+        c == char16_t('\0')) {
+      return true;
+    }
+    ++current;
+  }
+  return false;
+}
+
 }  // namespace detail
 
 // Public entry points are in htmlaccelNotInline.h for now.
diff --git a/parser/htmlaccel/htmlaccelNotInline.cpp b/parser/htmlaccel/htmlaccelNotInline.cpp
@@ -7,6 +7,15 @@
 
 namespace mozilla::htmlaccel {
 
+// TODO: Perhaps inlining this one on aarch64 wouldn't run into the
+// LLVM LICM vs. regalloc bug. But then, inlining this would only
+// avoid the overhead of one function call and wouldn't reuse the
+// SIMD contants in a useful way.
+MOZ_NEVER_INLINE bool ContainsMarkup(const char16_t* aPtr,
+                                     const char16_t* aEnd) {
+  return detail::ContainsMarkup(aPtr, aEnd);
+}
+
 /// The innerHTML / DOMParser case for the data state in the HTML parser
 MOZ_NEVER_INLINE int32_t AccelerateDataFastest(const char16_t* aPtr,
                                                const char16_t* aEnd) {
diff --git a/parser/htmlaccel/htmlaccelNotInline.h b/parser/htmlaccel/htmlaccelNotInline.h
@@ -8,6 +8,15 @@
 #include "mozilla/Attributes.h"
 
 namespace mozilla::htmlaccel {
+// True iff the buffer contains less-than, ampersand, carriage return,
+// or U+0000.
+//
+// This one could probably be inline without LLVM issues when SIMD
+// acceleration is statically enabled, but it's probably not worth
+// the complexity to do that.
+MOZ_NEVER_INLINE bool ContainsMarkup(const char16_t* aPtr,
+                                     const char16_t* aEnd);
+
 // Logically these should be MOZ_ALWAYS_INLINE_EVEN_DEBUG if LLVM was working
 // as expected. However, these are MOZ_NEVER_INLINE to work around
 // https://github.com/llvm/llvm-project/issues/160886 . This way, we get

	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE

M	dom/base/FragmentOrElement.cpp	\|	17	+++++++++++++++++
M	parser/htmlaccel/htmlaccel.h	\|	29	+++++++++++++++++++++++++++++
M	parser/htmlaccel/htmlaccelNotInline.cpp	\|	9	+++++++++
M	parser/htmlaccel/htmlaccelNotInline.h	\|	9	+++++++++