commit c553709e5cfb1b97c3e3def9fcf2f58e1b2bcfe0
parent 7b372211587fd5c7f90f369245de84b68eb6109a
Author: Henri Sivonen <hsivonen@hsivonen.fi>
Date: Thu, 6 Nov 2025 08:35:14 +0000
Bug 1499684 - Accelerate text node and attribute value serialization using SIMD. r=smaug
Differential Revision: https://phabricator.services.mozilla.com/D271356
Diffstat:
5 files changed, 336 insertions(+), 75 deletions(-)
diff --git a/dom/base/nsContentUtils.cpp b/dom/base/nsContentUtils.cpp
@@ -224,6 +224,8 @@
#include "mozilla/gfx/Rect.h"
#include "mozilla/gfx/Types.h"
#include "mozilla/glean/GleanPings.h"
+#include "mozilla/htmlaccel/htmlaccelEnabled.h"
+#include "mozilla/htmlaccel/htmlaccelNotInline.h"
#include "mozilla/intl/LocaleService.h"
#include "mozilla/ipc/ProtocolUtils.h"
#include "mozilla/net/UrlClassifierCommon.h"
@@ -9873,6 +9875,16 @@ class BulkAppender {
size_type mPosition;
};
+class StringBuilderSIMD {
+ public:
+ static const bool SIMD = true;
+};
+
+class StringBuilderALU {
+ public:
+ static const bool SIMD = false;
+};
+
class StringBuilder {
private:
class Unit {
@@ -9993,6 +10005,12 @@ class StringBuilder {
BulkAppender appender{appenderOrErr.unwrap()};
+ // Experiment with moving this higher up the call stack
+ // after we have experience from
+ // https://bugzilla.mozilla.org/show_bug.cgi?id=1997255
+ // on the parser side.
+ bool simd = mozilla::htmlaccel::htmlaccelEnabled();
+
for (StringBuilder* current = this; current;
current = current->mNext.get()) {
uint32_t len = current->mUnits.Length();
@@ -10006,7 +10024,11 @@ class StringBuilder {
appender.Append(u.mString);
break;
case Unit::Type::StringWithEncode:
- EncodeAttrString(u.mString, appender);
+ if (simd) {
+ EncodeAttrString<StringBuilderSIMD>(u.mString, appender);
+ } else {
+ EncodeAttrString<StringBuilderALU>(u.mString, appender);
+ }
break;
case Unit::Type::Literal:
appender.Append(u.mLiteral.AsSpan());
@@ -10022,13 +10044,31 @@ class StringBuilder {
break;
case Unit::Type::TextFragmentWithEncode:
if (u.mCharacterDataBuffer->Is2b()) {
- EncodeTextFragment(Span(u.mCharacterDataBuffer->Get2b(),
- u.mCharacterDataBuffer->GetLength()),
- appender);
+ if (simd) {
+ EncodeTextFragment<StringBuilderSIMD>(
+ Span(u.mCharacterDataBuffer->Get2b(),
+ u.mCharacterDataBuffer->GetLength()),
+ appender);
+
+ } else {
+ EncodeTextFragment<StringBuilderALU>(
+ Span(u.mCharacterDataBuffer->Get2b(),
+ u.mCharacterDataBuffer->GetLength()),
+ appender);
+ }
} else {
- EncodeTextFragment(Span(u.mCharacterDataBuffer->Get1b(),
- u.mCharacterDataBuffer->GetLength()),
- appender);
+ if (simd) {
+ EncodeTextFragment<StringBuilderSIMD>(
+ Span(u.mCharacterDataBuffer->Get1b(),
+ u.mCharacterDataBuffer->GetLength()),
+ appender);
+
+ } else {
+ EncodeTextFragment<StringBuilderALU>(
+ Span(u.mCharacterDataBuffer->Get1b(),
+ u.mCharacterDataBuffer->GetLength()),
+ appender);
+ }
}
break;
default:
@@ -10054,81 +10094,144 @@ class StringBuilder {
aFirst->mLast = this;
}
+ template <class S>
void EncodeAttrString(Span<const char16_t> aStr, BulkAppender& aAppender) {
size_t flushedUntil = 0;
size_t currentPosition = 0;
- for (char16_t c : aStr) {
+ const char16_t* ptr = aStr.Elements();
+ const char16_t* end = ptr + aStr.Length();
+
+ // continue by label (committee proposal P3568) isn't in C++, yet, so
+ // goto is used for what's logically continuing an outer loop.
+ //
+ // The purpose of two loop levels is to efficiently stay in the inner
+ // loop when there isn't a full SIMD stride left.
+ //
+ // Strange indent thanks to clang-format.
+ outer:
+ // Check for having at least a SIMD stride of data to avoid useless
+ // non-inline function calls when there isn't a full stride left.
+ // This should become unnecessary if it turns out to be feasible
+ // to inline the call without triggering
+ // https://github.com/llvm/llvm-project/issues/160886 .
+ if (S::SIMD && (end - ptr >= 16)) {
+ size_t skipped =
+ mozilla::htmlaccel::SkipNonEscapedInAttributeValue(ptr, end);
+ ptr += skipped;
+ currentPosition += skipped;
+ }
+ while (ptr != end) {
+ char16_t c = *ptr;
+ ptr++;
switch (c) {
case '"':
aAppender.Append(aStr.FromTo(flushedUntil, currentPosition));
aAppender.AppendLiteral(u""");
- flushedUntil = currentPosition + 1;
- break;
+ currentPosition++;
+ flushedUntil = currentPosition;
+ goto outer;
case '&':
aAppender.Append(aStr.FromTo(flushedUntil, currentPosition));
aAppender.AppendLiteral(u"&");
- flushedUntil = currentPosition + 1;
- break;
+ currentPosition++;
+ flushedUntil = currentPosition;
+ goto outer;
case 0x00A0:
aAppender.Append(aStr.FromTo(flushedUntil, currentPosition));
aAppender.AppendLiteral(u" ");
- flushedUntil = currentPosition + 1;
- break;
+ currentPosition++;
+ flushedUntil = currentPosition;
+ goto outer;
case '<':
if (StaticPrefs::dom_security_html_serialization_escape_lt_gt()) {
aAppender.Append(aStr.FromTo(flushedUntil, currentPosition));
aAppender.AppendLiteral(u"<");
- flushedUntil = currentPosition + 1;
+ currentPosition++;
+ flushedUntil = currentPosition;
+ } else {
+ currentPosition++;
}
- break;
+ goto outer;
case '>':
if (StaticPrefs::dom_security_html_serialization_escape_lt_gt()) {
aAppender.Append(aStr.FromTo(flushedUntil, currentPosition));
aAppender.AppendLiteral(u">");
- flushedUntil = currentPosition + 1;
+ currentPosition++;
+ flushedUntil = currentPosition;
+ } else {
+ currentPosition++;
}
- break;
+ goto outer;
default:
- break;
+ currentPosition++;
+ continue;
}
- currentPosition++;
}
+ // Logically the outer loop ends here.
if (currentPosition > flushedUntil) {
aAppender.Append(aStr.FromTo(flushedUntil, currentPosition));
}
}
- template <class T>
+ template <class S, class T>
void EncodeTextFragment(Span<const T> aStr, BulkAppender& aAppender) {
size_t flushedUntil = 0;
size_t currentPosition = 0;
- for (T c : aStr) {
+ const T* ptr = aStr.Elements();
+ const T* end = ptr + aStr.Length();
+
+ // continue by label (committee proposal P3568) isn't in C++, yet, so
+ // goto is used for what's logically continuing an outer loop.
+ //
+ // The purpose of two loop levels is to efficiently stay in the inner
+ // loop when there isn't a full SIMD stride left.
+ //
+ // Strange indent thanks to clang-format.
+ outer:
+ // Check for having at least a SIMD stride of data to avoid useless
+ // non-inline function calls when there isn't a full stride left.
+ // This should become unnecessary if it turns out to be feasible
+ // to inline the call without triggering
+ // https://github.com/llvm/llvm-project/issues/160886 .
+ if (S::SIMD && (end - ptr >= 16)) {
+ size_t skipped = mozilla::htmlaccel::SkipNonEscapedInTextNode(ptr, end);
+ ptr += skipped;
+ currentPosition += skipped;
+ }
+ while (ptr != end) {
+ T c = *ptr;
+ ++ptr;
switch (c) {
case '<':
aAppender.Append(aStr.FromTo(flushedUntil, currentPosition));
aAppender.AppendLiteral(u"<");
- flushedUntil = currentPosition + 1;
- break;
+ currentPosition++;
+ flushedUntil = currentPosition;
+ goto outer;
case '>':
aAppender.Append(aStr.FromTo(flushedUntil, currentPosition));
aAppender.AppendLiteral(u">");
- flushedUntil = currentPosition + 1;
- break;
+ currentPosition++;
+ flushedUntil = currentPosition;
+ goto outer;
case '&':
aAppender.Append(aStr.FromTo(flushedUntil, currentPosition));
aAppender.AppendLiteral(u"&");
- flushedUntil = currentPosition + 1;
- break;
+ currentPosition++;
+ flushedUntil = currentPosition;
+ goto outer;
case T(0xA0):
aAppender.Append(aStr.FromTo(flushedUntil, currentPosition));
aAppender.AppendLiteral(u" ");
- flushedUntil = currentPosition + 1;
- break;
+ currentPosition++;
+ flushedUntil = currentPosition;
+ goto outer;
default:
- break;
+ currentPosition++;
+ continue;
}
- currentPosition++;
}
+ // Logically the outer loop ends here.
if (currentPosition > flushedUntil) {
aAppender.Append(aStr.FromTo(flushedUntil, currentPosition));
}
@@ -10153,32 +10256,42 @@ static void AppendEncodedCharacters(const CharacterDataBuffer* aText,
uint32_t len = aText->GetLength();
if (aText->Is2b()) {
const char16_t* data = aText->Get2b();
- for (uint32_t i = 0; i < len; ++i) {
- const char16_t c = data[i];
- switch (c) {
- case '<':
- case '>':
- case '&':
- case 0x00A0:
- ++numEncodedChars;
- break;
- default:
- break;
+ if (mozilla::htmlaccel::htmlaccelEnabled()) {
+ numEncodedChars =
+ mozilla::htmlaccel::CountEscapedInTextNode(data, data + len);
+ } else {
+ for (uint32_t i = 0; i < len; ++i) {
+ const char16_t c = data[i];
+ switch (c) {
+ case '<':
+ case '>':
+ case '&':
+ case 0x00A0:
+ ++numEncodedChars;
+ break;
+ default:
+ break;
+ }
}
}
} else {
const char* data = aText->Get1b();
- for (uint32_t i = 0; i < len; ++i) {
- const unsigned char c = data[i];
- switch (c) {
- case '<':
- case '>':
- case '&':
- case 0x00A0:
- ++numEncodedChars;
- break;
- default:
- break;
+ if (mozilla::htmlaccel::htmlaccelEnabled()) {
+ numEncodedChars =
+ mozilla::htmlaccel::CountEscapedInTextNode(data, data + len);
+ } else {
+ for (uint32_t i = 0; i < len; ++i) {
+ const unsigned char c = data[i];
+ switch (c) {
+ case '<':
+ case '>':
+ case '&':
+ case 0x00A0:
+ ++numEncodedChars;
+ break;
+ default:
+ break;
+ }
}
}
}
@@ -10208,19 +10321,23 @@ static CheckedInt<uint32_t> ExtraSpaceNeededForAttrEncoding(
const char16_t* end = aValue.EndReading();
uint32_t numEncodedChars = 0;
- while (c < end) {
- switch (*c) {
- case '"':
- case '&':
- case 0x00A0: // NO-BREAK SPACE
- case '<':
- case '>':
- ++numEncodedChars;
- break;
- default:
- break;
+ if (mozilla::htmlaccel::htmlaccelEnabled()) {
+ numEncodedChars = mozilla::htmlaccel::CountEscapedInAttributeValue(c, end);
+ } else {
+ while (c < end) {
+ switch (*c) {
+ case '"':
+ case '&':
+ case 0x00A0: // NO-BREAK SPACE
+ case '<':
+ case '>':
+ ++numEncodedChars;
+ break;
+ default:
+ break;
+ }
+ ++c;
}
- ++c;
}
if (!numEncodedChars) {
diff --git a/parser/htmlaccel/htmlaccel.h b/parser/htmlaccel/htmlaccel.h
@@ -203,6 +203,7 @@ namespace detail {
// 16 indicates the index of the leftmost match.
const uint8x16_t INVERTED_ADVANCES = {16, 15, 14, 13, 12, 11, 10, 9,
8, 7, 6, 5, 4, 3, 2, 1};
+const uint8x16_t ALL_ONES = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
MOZ_ALWAYS_INLINE_EVEN_DEBUG uint8x16_t TableLookup(uint8x16_t aTable,
uint8x16_t aNibbles) {
@@ -254,6 +255,13 @@ const uint8x16_t ZERO_LT_AMP_CR = {0, 2, 1, 1, 1, 1, '&', 1,
/// Disallow U+0000, less-than, ampersand, carriage return, and line feed.
const uint8x16_t ZERO_LT_AMP_CR_LF = {0, 2, 1, 1, 1, 1, '&', 1,
1, 1, '\n', 1, '<', '\r', 1, 1};
+/// Disallow less-than, greater-than, ampersand, and no-break space.
+const uint8x16_t LT_GT_AMP_NBSP = {0xA0, 2, 1, 1, 1, 1, '&', 1,
+ 1, 1, 1, 1, '<', 1, '>', 1};
+/// Disallow less-than, greater-than, ampersand, no-break space, and double
+/// quote.
+const uint8x16_t LT_GT_AMP_NBSP_QUOT = {0xA0, 2, '"', 1, 1, 1, '&', 1,
+ 1, 1, 1, 1, '<', 1, '>', 1};
/// Compute a 16-lane mask for for 16 UTF-16 code units, where a lane
/// is 0x00 if OK to skip and 0xFF in not OK to skip.
@@ -286,17 +294,31 @@ StrideToMask(const char16_t* aArr /* len = 16 */, uint8x16_t aTable,
return ret;
}
-MOZ_ALWAYS_INLINE_EVEN_DEBUG int32_t AccelerateTextNode(const char16_t* aInput,
- const char16_t* aEnd,
- uint8x16_t aTable,
- bool aAllowSurrogates) {
- const char16_t* current = aInput;
+/// Compute a 16-lane mask for for 16 Latin1 code units, where a lane
+/// is 0x00 if OK to skip and 0xFF in not OK to skip.
+/// `aAllowSurrogates` exist for signature compatibility with the UTF-16
+/// case and is unused.
+MOZ_ALWAYS_INLINE_EVEN_DEBUG uint8x16_t StrideToMask(
+ const char* aArr /* len = 16 */, uint8x16_t aTable, bool aAllowSurrogates) {
+ uint8x16_t stride;
+ // memcpy generates a single unaligned load instruction with both ISAs.
+ memcpy(&stride, aArr, 16);
+ // == compares lane-wise and returns a mask vector.
+ return stride == TableLookup(aTable, stride & NIBBLE_MASK);
+}
+
+template <typename CharT>
+MOZ_ALWAYS_INLINE_EVEN_DEBUG size_t AccelerateTextNode(const CharT* aInput,
+ const CharT* aEnd,
+ uint8x16_t aTable,
+ bool aAllowSurrogates) {
+ const CharT* current = aInput;
while (aEnd - current >= 16) {
uint8x16_t mask = StrideToMask(current, aTable, aAllowSurrogates);
#if defined(__aarch64__)
uint8_t max = vmaxvq_u8(mask & INVERTED_ADVANCES);
if (max != 0) {
- return int32_t((current - aInput) + 16 - max);
+ return size_t((current - aInput) + 16 - max);
}
#else // x86/x86_64
int int_mask = _mm_movemask_epi8(mask);
@@ -305,12 +327,42 @@ MOZ_ALWAYS_INLINE_EVEN_DEBUG int32_t AccelerateTextNode(const char16_t* aInput,
// the first SIMD lane in text order. Hence, we need to count
// trailing zeros. We already checked that the bits are not
// all zeros, so __builtin_ctz isn't UB.
- return int32_t((current - aInput) + __builtin_ctz(int_mask));
+ return size_t((current - aInput) + __builtin_ctz(int_mask));
}
#endif
current += 16;
}
- return int32_t(current - aInput);
+ return size_t(current - aInput);
+}
+
+template <typename CharT>
+MOZ_ALWAYS_INLINE_EVEN_DEBUG uint32_t CountEscaped(const CharT* aInput,
+ const CharT* aEnd,
+ bool aCountDoubleQuote) {
+ uint32_t numEncodedChars = 0;
+ const CharT* current = aInput;
+ while (aEnd - current >= 16) {
+ uint8x16_t mask = StrideToMask(
+ current, aCountDoubleQuote ? LT_GT_AMP_NBSP_QUOT : LT_GT_AMP_NBSP,
+ true);
+#if defined(__aarch64__)
+ // Reduce on each iteration to avoid branching for overflow avoidance
+ // on each iteration.
+ numEncodedChars += vaddvq_u8(mask & ALL_ONES);
+#else // x86_64
+ numEncodedChars += __builtin_popcount(_mm_movemask_epi8(mask));
+#endif
+ current += 16;
+ }
+ while (current != aEnd) {
+ CharT c = *current;
+ if ((aCountDoubleQuote && c == CharT('"')) || c == CharT('&') ||
+ c == CharT('<') || c == CharT('>') || c == CharT(0xA0)) {
+ ++numEncodedChars;
+ }
+ ++current;
+ }
+ return numEncodedChars;
}
MOZ_ALWAYS_INLINE_EVEN_DEBUG bool ContainsMarkup(const char16_t* aInput,
diff --git a/parser/htmlaccel/htmlaccelEnabled.h b/parser/htmlaccel/htmlaccelEnabled.h
@@ -5,6 +5,7 @@
#ifndef mozilla_htmlaccel_htmlaccelEnabled_h
#define mozilla_htmlaccel_htmlaccelEnabled_h
+#include "mozilla/Assertions.h"
#if defined(__x86_64__)
# include "mozilla/SSE.h"
#endif
@@ -23,7 +24,15 @@ inline bool htmlaccelEnabled() {
#elif defined(__aarch64__) && defined(__LITTLE_ENDIAN__)
return true;
#elif defined(__x86_64__)
- return mozilla::supports_bmi() && mozilla::supports_avx();
+ bool ret = mozilla::supports_bmi();
+ if (ret) {
+ // As a micro optimization for the innerHTML getter, don't bother
+ // calling `supports_avx` in release builds, since the implementation
+ // of SSE.h supports_bmi implies supports_avx anyway.
+ MOZ_ASSERT(mozilla::supports_avx(),
+ "supports_bmi is supposed to imply supports_avx");
+ }
+ return ret;
#else
return false;
#endif
diff --git a/parser/htmlaccel/htmlaccelNotInline.cpp b/parser/htmlaccel/htmlaccelNotInline.cpp
@@ -16,6 +16,47 @@ MOZ_NEVER_INLINE bool ContainsMarkup(const char16_t* aPtr,
return detail::ContainsMarkup(aPtr, aEnd);
}
+/// Skip over SIMD strides not containing less-than, greater-than, ampersand,
+/// and no-break space.
+MOZ_NEVER_INLINE size_t SkipNonEscapedInTextNode(const char16_t* aPtr,
+ const char16_t* aEnd) {
+ return detail::AccelerateTextNode(aPtr, aEnd, detail::LT_GT_AMP_NBSP, true);
+}
+
+/// Skip over SIMD strides not containing less-than, greater-than, ampersand,
+/// and no-break space.
+MOZ_NEVER_INLINE size_t SkipNonEscapedInTextNode(const char* aPtr,
+ const char* aEnd) {
+ return detail::AccelerateTextNode(aPtr, aEnd, detail::LT_GT_AMP_NBSP, true);
+}
+
+/// Skip over SIMD strides not containing less-than, greater-than, ampersand,
+/// no-break space, and double quote.
+MOZ_NEVER_INLINE size_t SkipNonEscapedInAttributeValue(const char16_t* aPtr,
+ const char16_t* aEnd) {
+ return detail::AccelerateTextNode(aPtr, aEnd, detail::LT_GT_AMP_NBSP_QUOT,
+ true);
+}
+
+/// Count occurrences of less-than, greater-than, ampersand, and no-break space.
+MOZ_NEVER_INLINE uint32_t CountEscapedInTextNode(const char16_t* aPtr,
+ const char16_t* aEnd) {
+ return detail::CountEscaped(aPtr, aEnd, false);
+}
+
+/// Count occurrences of less-than, greater-than, ampersand, and no-break space.
+MOZ_NEVER_INLINE uint32_t CountEscapedInTextNode(const char* aPtr,
+ const char* aEnd) {
+ return detail::CountEscaped(aPtr, aEnd, false);
+}
+
+/// Count occurrences of less-than, greater-than, ampersand, no-break space, and
+/// double quote.
+MOZ_NEVER_INLINE uint32_t CountEscapedInAttributeValue(const char16_t* aPtr,
+ const char16_t* aEnd) {
+ return detail::CountEscaped(aPtr, aEnd, true);
+}
+
/// The innerHTML / DOMParser case for the data state in the HTML parser
MOZ_NEVER_INLINE int32_t AccelerateDataFastest(const char16_t* aPtr,
const char16_t* aEnd) {
diff --git a/parser/htmlaccel/htmlaccelNotInline.h b/parser/htmlaccel/htmlaccelNotInline.h
@@ -17,6 +17,48 @@ namespace mozilla::htmlaccel {
MOZ_NEVER_INLINE bool ContainsMarkup(const char16_t* aPtr,
const char16_t* aEnd);
+// HTML Serializer functions
+
+// NOTE! The SkipNonEscaped functions only skip full SIMD strides, but the
+// CountEscaped functions also handle the tail that doesn't fit in a SIMD
+// stride.
+
+/// Returns the length of the prefix that does not contain less-than,
+/// greater-than, ampersand, or no-break space.
+/// Examines the input in multiples of 16 code units and does not examine
+/// the tail that is left over.
+MOZ_NEVER_INLINE size_t SkipNonEscapedInTextNode(const char16_t* aPtr,
+ const char16_t* aEnd);
+
+/// Returns the length of the prefix that does not contain less-than,
+/// greater-than, ampersand, or no-break space.
+/// Examines the input in multiples of 16 code units and does not examine
+/// the tail that is left over.
+MOZ_NEVER_INLINE size_t SkipNonEscapedInTextNode(const char* aPtr,
+ const char* aEnd);
+
+/// Returns the length of the prefix that does not contain less-than,
+/// greater-than, ampersand, no-break space, or double quote.
+/// Examines the input in multiples of 16 code units and does not examine
+/// the tail that is left over.
+MOZ_NEVER_INLINE size_t SkipNonEscapedInAttributeValue(const char16_t* aPtr,
+ const char16_t* aEnd);
+
+/// Count occurrences of less-than, greater-than, ampersand, and no-break space.
+MOZ_NEVER_INLINE uint32_t CountEscapedInTextNode(const char16_t* aPtr,
+ const char16_t* aEnd);
+
+/// Count occurrences of less-than, greater-than, ampersand, and no-break space.
+MOZ_NEVER_INLINE uint32_t CountEscapedInTextNode(const char* aPtr,
+ const char* aEnd);
+
+/// Count occurrences of less-than, greater-than, ampersand, no-break space, and
+/// double quote.
+MOZ_NEVER_INLINE uint32_t CountEscapedInAttributeValue(const char16_t* aPtr,
+ const char16_t* aEnd);
+
+// HTML Tokenizer functions
+
// Logically these should be MOZ_ALWAYS_INLINE_EVEN_DEBUG if LLVM was working
// as expected. However, these are MOZ_NEVER_INLINE to work around
// https://github.com/llvm/llvm-project/issues/160886 . This way, we get