commit 4d2d424e3c1e9fc5df7162cd85d413627998727b
parent 81f5a1f4eb72c479f7759641ee564ee4c1eee038
Author: Henri Sivonen <hsivonen@hsivonen.fi>
Date: Wed, 5 Nov 2025 10:25:53 +0000
Bug 1997859 - Accelerate ContainsMarkup in innerHTML setter with SIMD. r=smaug
Differential Revision: https://phabricator.services.mozilla.com/D271012
Diffstat:
4 files changed, 64 insertions(+), 0 deletions(-)
diff --git a/dom/base/FragmentOrElement.cpp b/dom/base/FragmentOrElement.cpp
@@ -73,6 +73,8 @@
#include "mozilla/dom/NodeListBinding.h"
#include "mozilla/dom/SVGUseElement.h"
#include "mozilla/dom/ShadowRoot.h"
+#include "mozilla/htmlaccel/htmlaccelEnabled.h"
+#include "mozilla/htmlaccel/htmlaccelNotInline.h"
#include "nsCCUncollectableMarker.h"
#include "nsChildContentList.h"
#include "nsContentCreatorFunctions.h"
@@ -1917,6 +1919,21 @@ static bool ContainsMarkup(const nsAString& aStr) {
const char16_t* start = aStr.BeginReading();
const char16_t* end = aStr.EndReading();
+ if (mozilla::htmlaccel::htmlaccelEnabled()) {
+ // We need to check for the empty string in order to
+ // dereference `start` for the '<' check. We might as well
+ // check that we have a full SIMD stride.
+ if (end - start >= 16) {
+ // Optimize the case where the input starts with a tag.
+ if (*start == u'<') {
+ return true;
+ }
+ // Curiously, this doesn't look like much of an optimization on Zen 3,
+ // but since it is an optimization on M3 Pro and Skylake, let's do this.
+ return mozilla::htmlaccel::ContainsMarkup(start, end);
+ }
+ }
+
while (start != end) {
char16_t c = *start;
if (c == char16_t('<') || c == char16_t('&') || c == char16_t('\r') ||
diff --git a/parser/htmlaccel/htmlaccel.h b/parser/htmlaccel/htmlaccel.h
@@ -313,6 +313,35 @@ MOZ_ALWAYS_INLINE_EVEN_DEBUG int32_t AccelerateTextNode(const char16_t* aInput,
return int32_t(current - aInput);
}
+MOZ_ALWAYS_INLINE_EVEN_DEBUG bool ContainsMarkup(const char16_t* aInput,
+ const char16_t* aEnd) {
+ const char16_t* current = aInput;
+ while (aEnd - current >= 16) {
+ uint8x16_t mask = StrideToMask(current, ZERO_LT_AMP_CR, true);
+#if defined(__aarch64__)
+ uint8_t max = vmaxvq_u8(mask);
+ if (max != 0) {
+ return true;
+ }
+#else // x86/x86_64
+ int int_mask = _mm_movemask_epi8(mask);
+ if (int_mask != 0) {
+ return true;
+ }
+#endif
+ current += 16;
+ }
+ while (current != aEnd) {
+ char16_t c = *current;
+ if (c == char16_t('<') || c == char16_t('&') || c == char16_t('\r') ||
+ c == char16_t('\0')) {
+ return true;
+ }
+ ++current;
+ }
+ return false;
+}
+
} // namespace detail
// Public entry points are in htmlaccelNotInline.h for now.
diff --git a/parser/htmlaccel/htmlaccelNotInline.cpp b/parser/htmlaccel/htmlaccelNotInline.cpp
@@ -7,6 +7,15 @@
namespace mozilla::htmlaccel {
+// TODO: Perhaps inlining this one on aarch64 wouldn't run into the
+// LLVM LICM vs. regalloc bug. But then, inlining this would only
+// avoid the overhead of one function call and wouldn't reuse the
+// SIMD contants in a useful way.
+MOZ_NEVER_INLINE bool ContainsMarkup(const char16_t* aPtr,
+ const char16_t* aEnd) {
+ return detail::ContainsMarkup(aPtr, aEnd);
+}
+
/// The innerHTML / DOMParser case for the data state in the HTML parser
MOZ_NEVER_INLINE int32_t AccelerateDataFastest(const char16_t* aPtr,
const char16_t* aEnd) {
diff --git a/parser/htmlaccel/htmlaccelNotInline.h b/parser/htmlaccel/htmlaccelNotInline.h
@@ -8,6 +8,15 @@
#include "mozilla/Attributes.h"
namespace mozilla::htmlaccel {
+// True iff the buffer contains less-than, ampersand, carriage return,
+// or U+0000.
+//
+// This one could probably be inline without LLVM issues when SIMD
+// acceleration is statically enabled, but it's probably not worth
+// the complexity to do that.
+MOZ_NEVER_INLINE bool ContainsMarkup(const char16_t* aPtr,
+ const char16_t* aEnd);
+
// Logically these should be MOZ_ALWAYS_INLINE_EVEN_DEBUG if LLVM was working
// as expected. However, these are MOZ_NEVER_INLINE to work around
// https://github.com/llvm/llvm-project/issues/160886 . This way, we get