tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit c553709e5cfb1b97c3e3def9fcf2f58e1b2bcfe0
parent 7b372211587fd5c7f90f369245de84b68eb6109a
Author: Henri Sivonen <hsivonen@hsivonen.fi>
Date:   Thu,  6 Nov 2025 08:35:14 +0000

Bug 1499684 - Accelerate text node and attribute value serialization using SIMD. r=smaug

Differential Revision: https://phabricator.services.mozilla.com/D271356

Diffstat:
Mdom/base/nsContentUtils.cpp | 249++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------
Mparser/htmlaccel/htmlaccel.h | 68++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
Mparser/htmlaccel/htmlaccelEnabled.h | 11++++++++++-
Mparser/htmlaccel/htmlaccelNotInline.cpp | 41+++++++++++++++++++++++++++++++++++++++++
Mparser/htmlaccel/htmlaccelNotInline.h | 42++++++++++++++++++++++++++++++++++++++++++
5 files changed, 336 insertions(+), 75 deletions(-)

diff --git a/dom/base/nsContentUtils.cpp b/dom/base/nsContentUtils.cpp @@ -224,6 +224,8 @@ #include "mozilla/gfx/Rect.h" #include "mozilla/gfx/Types.h" #include "mozilla/glean/GleanPings.h" +#include "mozilla/htmlaccel/htmlaccelEnabled.h" +#include "mozilla/htmlaccel/htmlaccelNotInline.h" #include "mozilla/intl/LocaleService.h" #include "mozilla/ipc/ProtocolUtils.h" #include "mozilla/net/UrlClassifierCommon.h" @@ -9873,6 +9875,16 @@ class BulkAppender { size_type mPosition; }; +class StringBuilderSIMD { + public: + static const bool SIMD = true; +}; + +class StringBuilderALU { + public: + static const bool SIMD = false; +}; + class StringBuilder { private: class Unit { @@ -9993,6 +10005,12 @@ class StringBuilder { BulkAppender appender{appenderOrErr.unwrap()}; + // Experiment with moving this higher up the call stack + // after we have experience from + // https://bugzilla.mozilla.org/show_bug.cgi?id=1997255 + // on the parser side. + bool simd = mozilla::htmlaccel::htmlaccelEnabled(); + for (StringBuilder* current = this; current; current = current->mNext.get()) { uint32_t len = current->mUnits.Length(); @@ -10006,7 +10024,11 @@ class StringBuilder { appender.Append(u.mString); break; case Unit::Type::StringWithEncode: - EncodeAttrString(u.mString, appender); + if (simd) { + EncodeAttrString<StringBuilderSIMD>(u.mString, appender); + } else { + EncodeAttrString<StringBuilderALU>(u.mString, appender); + } break; case Unit::Type::Literal: appender.Append(u.mLiteral.AsSpan()); @@ -10022,13 +10044,31 @@ class StringBuilder { break; case Unit::Type::TextFragmentWithEncode: if (u.mCharacterDataBuffer->Is2b()) { - EncodeTextFragment(Span(u.mCharacterDataBuffer->Get2b(), - u.mCharacterDataBuffer->GetLength()), - appender); + if (simd) { + EncodeTextFragment<StringBuilderSIMD>( + Span(u.mCharacterDataBuffer->Get2b(), + u.mCharacterDataBuffer->GetLength()), + appender); + + } else { + EncodeTextFragment<StringBuilderALU>( + Span(u.mCharacterDataBuffer->Get2b(), + u.mCharacterDataBuffer->GetLength()), + appender); + } } else { - EncodeTextFragment(Span(u.mCharacterDataBuffer->Get1b(), - u.mCharacterDataBuffer->GetLength()), - appender); + if (simd) { + EncodeTextFragment<StringBuilderSIMD>( + Span(u.mCharacterDataBuffer->Get1b(), + u.mCharacterDataBuffer->GetLength()), + appender); + + } else { + EncodeTextFragment<StringBuilderALU>( + Span(u.mCharacterDataBuffer->Get1b(), + u.mCharacterDataBuffer->GetLength()), + appender); + } } break; default: @@ -10054,81 +10094,144 @@ class StringBuilder { aFirst->mLast = this; } + template <class S> void EncodeAttrString(Span<const char16_t> aStr, BulkAppender& aAppender) { size_t flushedUntil = 0; size_t currentPosition = 0; - for (char16_t c : aStr) { + const char16_t* ptr = aStr.Elements(); + const char16_t* end = ptr + aStr.Length(); + + // continue by label (committee proposal P3568) isn't in C++, yet, so + // goto is used for what's logically continuing an outer loop. + // + // The purpose of two loop levels is to efficiently stay in the inner + // loop when there isn't a full SIMD stride left. + // + // Strange indent thanks to clang-format. + outer: + // Check for having at least a SIMD stride of data to avoid useless + // non-inline function calls when there isn't a full stride left. + // This should become unnecessary if it turns out to be feasible + // to inline the call without triggering + // https://github.com/llvm/llvm-project/issues/160886 . + if (S::SIMD && (end - ptr >= 16)) { + size_t skipped = + mozilla::htmlaccel::SkipNonEscapedInAttributeValue(ptr, end); + ptr += skipped; + currentPosition += skipped; + } + while (ptr != end) { + char16_t c = *ptr; + ptr++; switch (c) { case '"': aAppender.Append(aStr.FromTo(flushedUntil, currentPosition)); aAppender.AppendLiteral(u"&quot;"); - flushedUntil = currentPosition + 1; - break; + currentPosition++; + flushedUntil = currentPosition; + goto outer; case '&': aAppender.Append(aStr.FromTo(flushedUntil, currentPosition)); aAppender.AppendLiteral(u"&amp;"); - flushedUntil = currentPosition + 1; - break; + currentPosition++; + flushedUntil = currentPosition; + goto outer; case 0x00A0: aAppender.Append(aStr.FromTo(flushedUntil, currentPosition)); aAppender.AppendLiteral(u"&nbsp;"); - flushedUntil = currentPosition + 1; - break; + currentPosition++; + flushedUntil = currentPosition; + goto outer; case '<': if (StaticPrefs::dom_security_html_serialization_escape_lt_gt()) { aAppender.Append(aStr.FromTo(flushedUntil, currentPosition)); aAppender.AppendLiteral(u"&lt;"); - flushedUntil = currentPosition + 1; + currentPosition++; + flushedUntil = currentPosition; + } else { + currentPosition++; } - break; + goto outer; case '>': if (StaticPrefs::dom_security_html_serialization_escape_lt_gt()) { aAppender.Append(aStr.FromTo(flushedUntil, currentPosition)); aAppender.AppendLiteral(u"&gt;"); - flushedUntil = currentPosition + 1; + currentPosition++; + flushedUntil = currentPosition; + } else { + currentPosition++; } - break; + goto outer; default: - break; + currentPosition++; + continue; } - currentPosition++; } + // Logically the outer loop ends here. if (currentPosition > flushedUntil) { aAppender.Append(aStr.FromTo(flushedUntil, currentPosition)); } } - template <class T> + template <class S, class T> void EncodeTextFragment(Span<const T> aStr, BulkAppender& aAppender) { size_t flushedUntil = 0; size_t currentPosition = 0; - for (T c : aStr) { + const T* ptr = aStr.Elements(); + const T* end = ptr + aStr.Length(); + + // continue by label (committee proposal P3568) isn't in C++, yet, so + // goto is used for what's logically continuing an outer loop. + // + // The purpose of two loop levels is to efficiently stay in the inner + // loop when there isn't a full SIMD stride left. + // + // Strange indent thanks to clang-format. + outer: + // Check for having at least a SIMD stride of data to avoid useless + // non-inline function calls when there isn't a full stride left. + // This should become unnecessary if it turns out to be feasible + // to inline the call without triggering + // https://github.com/llvm/llvm-project/issues/160886 . + if (S::SIMD && (end - ptr >= 16)) { + size_t skipped = mozilla::htmlaccel::SkipNonEscapedInTextNode(ptr, end); + ptr += skipped; + currentPosition += skipped; + } + while (ptr != end) { + T c = *ptr; + ++ptr; switch (c) { case '<': aAppender.Append(aStr.FromTo(flushedUntil, currentPosition)); aAppender.AppendLiteral(u"&lt;"); - flushedUntil = currentPosition + 1; - break; + currentPosition++; + flushedUntil = currentPosition; + goto outer; case '>': aAppender.Append(aStr.FromTo(flushedUntil, currentPosition)); aAppender.AppendLiteral(u"&gt;"); - flushedUntil = currentPosition + 1; - break; + currentPosition++; + flushedUntil = currentPosition; + goto outer; case '&': aAppender.Append(aStr.FromTo(flushedUntil, currentPosition)); aAppender.AppendLiteral(u"&amp;"); - flushedUntil = currentPosition + 1; - break; + currentPosition++; + flushedUntil = currentPosition; + goto outer; case T(0xA0): aAppender.Append(aStr.FromTo(flushedUntil, currentPosition)); aAppender.AppendLiteral(u"&nbsp;"); - flushedUntil = currentPosition + 1; - break; + currentPosition++; + flushedUntil = currentPosition; + goto outer; default: - break; + currentPosition++; + continue; } - currentPosition++; } + // Logically the outer loop ends here. if (currentPosition > flushedUntil) { aAppender.Append(aStr.FromTo(flushedUntil, currentPosition)); } @@ -10153,32 +10256,42 @@ static void AppendEncodedCharacters(const CharacterDataBuffer* aText, uint32_t len = aText->GetLength(); if (aText->Is2b()) { const char16_t* data = aText->Get2b(); - for (uint32_t i = 0; i < len; ++i) { - const char16_t c = data[i]; - switch (c) { - case '<': - case '>': - case '&': - case 0x00A0: - ++numEncodedChars; - break; - default: - break; + if (mozilla::htmlaccel::htmlaccelEnabled()) { + numEncodedChars = + mozilla::htmlaccel::CountEscapedInTextNode(data, data + len); + } else { + for (uint32_t i = 0; i < len; ++i) { + const char16_t c = data[i]; + switch (c) { + case '<': + case '>': + case '&': + case 0x00A0: + ++numEncodedChars; + break; + default: + break; + } } } } else { const char* data = aText->Get1b(); - for (uint32_t i = 0; i < len; ++i) { - const unsigned char c = data[i]; - switch (c) { - case '<': - case '>': - case '&': - case 0x00A0: - ++numEncodedChars; - break; - default: - break; + if (mozilla::htmlaccel::htmlaccelEnabled()) { + numEncodedChars = + mozilla::htmlaccel::CountEscapedInTextNode(data, data + len); + } else { + for (uint32_t i = 0; i < len; ++i) { + const unsigned char c = data[i]; + switch (c) { + case '<': + case '>': + case '&': + case 0x00A0: + ++numEncodedChars; + break; + default: + break; + } } } } @@ -10208,19 +10321,23 @@ static CheckedInt<uint32_t> ExtraSpaceNeededForAttrEncoding( const char16_t* end = aValue.EndReading(); uint32_t numEncodedChars = 0; - while (c < end) { - switch (*c) { - case '"': - case '&': - case 0x00A0: // NO-BREAK SPACE - case '<': - case '>': - ++numEncodedChars; - break; - default: - break; + if (mozilla::htmlaccel::htmlaccelEnabled()) { + numEncodedChars = mozilla::htmlaccel::CountEscapedInAttributeValue(c, end); + } else { + while (c < end) { + switch (*c) { + case '"': + case '&': + case 0x00A0: // NO-BREAK SPACE + case '<': + case '>': + ++numEncodedChars; + break; + default: + break; + } + ++c; } - ++c; } if (!numEncodedChars) { diff --git a/parser/htmlaccel/htmlaccel.h b/parser/htmlaccel/htmlaccel.h @@ -203,6 +203,7 @@ namespace detail { // 16 indicates the index of the leftmost match. const uint8x16_t INVERTED_ADVANCES = {16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}; +const uint8x16_t ALL_ONES = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; MOZ_ALWAYS_INLINE_EVEN_DEBUG uint8x16_t TableLookup(uint8x16_t aTable, uint8x16_t aNibbles) { @@ -254,6 +255,13 @@ const uint8x16_t ZERO_LT_AMP_CR = {0, 2, 1, 1, 1, 1, '&', 1, /// Disallow U+0000, less-than, ampersand, carriage return, and line feed. const uint8x16_t ZERO_LT_AMP_CR_LF = {0, 2, 1, 1, 1, 1, '&', 1, 1, 1, '\n', 1, '<', '\r', 1, 1}; +/// Disallow less-than, greater-than, ampersand, and no-break space. +const uint8x16_t LT_GT_AMP_NBSP = {0xA0, 2, 1, 1, 1, 1, '&', 1, + 1, 1, 1, 1, '<', 1, '>', 1}; +/// Disallow less-than, greater-than, ampersand, no-break space, and double +/// quote. +const uint8x16_t LT_GT_AMP_NBSP_QUOT = {0xA0, 2, '"', 1, 1, 1, '&', 1, + 1, 1, 1, 1, '<', 1, '>', 1}; /// Compute a 16-lane mask for for 16 UTF-16 code units, where a lane /// is 0x00 if OK to skip and 0xFF in not OK to skip. @@ -286,17 +294,31 @@ StrideToMask(const char16_t* aArr /* len = 16 */, uint8x16_t aTable, return ret; } -MOZ_ALWAYS_INLINE_EVEN_DEBUG int32_t AccelerateTextNode(const char16_t* aInput, - const char16_t* aEnd, - uint8x16_t aTable, - bool aAllowSurrogates) { - const char16_t* current = aInput; +/// Compute a 16-lane mask for for 16 Latin1 code units, where a lane +/// is 0x00 if OK to skip and 0xFF in not OK to skip. +/// `aAllowSurrogates` exist for signature compatibility with the UTF-16 +/// case and is unused. +MOZ_ALWAYS_INLINE_EVEN_DEBUG uint8x16_t StrideToMask( + const char* aArr /* len = 16 */, uint8x16_t aTable, bool aAllowSurrogates) { + uint8x16_t stride; + // memcpy generates a single unaligned load instruction with both ISAs. + memcpy(&stride, aArr, 16); + // == compares lane-wise and returns a mask vector. + return stride == TableLookup(aTable, stride & NIBBLE_MASK); +} + +template <typename CharT> +MOZ_ALWAYS_INLINE_EVEN_DEBUG size_t AccelerateTextNode(const CharT* aInput, + const CharT* aEnd, + uint8x16_t aTable, + bool aAllowSurrogates) { + const CharT* current = aInput; while (aEnd - current >= 16) { uint8x16_t mask = StrideToMask(current, aTable, aAllowSurrogates); #if defined(__aarch64__) uint8_t max = vmaxvq_u8(mask & INVERTED_ADVANCES); if (max != 0) { - return int32_t((current - aInput) + 16 - max); + return size_t((current - aInput) + 16 - max); } #else // x86/x86_64 int int_mask = _mm_movemask_epi8(mask); @@ -305,12 +327,42 @@ MOZ_ALWAYS_INLINE_EVEN_DEBUG int32_t AccelerateTextNode(const char16_t* aInput, // the first SIMD lane in text order. Hence, we need to count // trailing zeros. We already checked that the bits are not // all zeros, so __builtin_ctz isn't UB. - return int32_t((current - aInput) + __builtin_ctz(int_mask)); + return size_t((current - aInput) + __builtin_ctz(int_mask)); } #endif current += 16; } - return int32_t(current - aInput); + return size_t(current - aInput); +} + +template <typename CharT> +MOZ_ALWAYS_INLINE_EVEN_DEBUG uint32_t CountEscaped(const CharT* aInput, + const CharT* aEnd, + bool aCountDoubleQuote) { + uint32_t numEncodedChars = 0; + const CharT* current = aInput; + while (aEnd - current >= 16) { + uint8x16_t mask = StrideToMask( + current, aCountDoubleQuote ? LT_GT_AMP_NBSP_QUOT : LT_GT_AMP_NBSP, + true); +#if defined(__aarch64__) + // Reduce on each iteration to avoid branching for overflow avoidance + // on each iteration. + numEncodedChars += vaddvq_u8(mask & ALL_ONES); +#else // x86_64 + numEncodedChars += __builtin_popcount(_mm_movemask_epi8(mask)); +#endif + current += 16; + } + while (current != aEnd) { + CharT c = *current; + if ((aCountDoubleQuote && c == CharT('"')) || c == CharT('&') || + c == CharT('<') || c == CharT('>') || c == CharT(0xA0)) { + ++numEncodedChars; + } + ++current; + } + return numEncodedChars; } MOZ_ALWAYS_INLINE_EVEN_DEBUG bool ContainsMarkup(const char16_t* aInput, diff --git a/parser/htmlaccel/htmlaccelEnabled.h b/parser/htmlaccel/htmlaccelEnabled.h @@ -5,6 +5,7 @@ #ifndef mozilla_htmlaccel_htmlaccelEnabled_h #define mozilla_htmlaccel_htmlaccelEnabled_h +#include "mozilla/Assertions.h" #if defined(__x86_64__) # include "mozilla/SSE.h" #endif @@ -23,7 +24,15 @@ inline bool htmlaccelEnabled() { #elif defined(__aarch64__) && defined(__LITTLE_ENDIAN__) return true; #elif defined(__x86_64__) - return mozilla::supports_bmi() && mozilla::supports_avx(); + bool ret = mozilla::supports_bmi(); + if (ret) { + // As a micro optimization for the innerHTML getter, don't bother + // calling `supports_avx` in release builds, since the implementation + // of SSE.h supports_bmi implies supports_avx anyway. + MOZ_ASSERT(mozilla::supports_avx(), + "supports_bmi is supposed to imply supports_avx"); + } + return ret; #else return false; #endif diff --git a/parser/htmlaccel/htmlaccelNotInline.cpp b/parser/htmlaccel/htmlaccelNotInline.cpp @@ -16,6 +16,47 @@ MOZ_NEVER_INLINE bool ContainsMarkup(const char16_t* aPtr, return detail::ContainsMarkup(aPtr, aEnd); } +/// Skip over SIMD strides not containing less-than, greater-than, ampersand, +/// and no-break space. +MOZ_NEVER_INLINE size_t SkipNonEscapedInTextNode(const char16_t* aPtr, + const char16_t* aEnd) { + return detail::AccelerateTextNode(aPtr, aEnd, detail::LT_GT_AMP_NBSP, true); +} + +/// Skip over SIMD strides not containing less-than, greater-than, ampersand, +/// and no-break space. +MOZ_NEVER_INLINE size_t SkipNonEscapedInTextNode(const char* aPtr, + const char* aEnd) { + return detail::AccelerateTextNode(aPtr, aEnd, detail::LT_GT_AMP_NBSP, true); +} + +/// Skip over SIMD strides not containing less-than, greater-than, ampersand, +/// no-break space, and double quote. +MOZ_NEVER_INLINE size_t SkipNonEscapedInAttributeValue(const char16_t* aPtr, + const char16_t* aEnd) { + return detail::AccelerateTextNode(aPtr, aEnd, detail::LT_GT_AMP_NBSP_QUOT, + true); +} + +/// Count occurrences of less-than, greater-than, ampersand, and no-break space. +MOZ_NEVER_INLINE uint32_t CountEscapedInTextNode(const char16_t* aPtr, + const char16_t* aEnd) { + return detail::CountEscaped(aPtr, aEnd, false); +} + +/// Count occurrences of less-than, greater-than, ampersand, and no-break space. +MOZ_NEVER_INLINE uint32_t CountEscapedInTextNode(const char* aPtr, + const char* aEnd) { + return detail::CountEscaped(aPtr, aEnd, false); +} + +/// Count occurrences of less-than, greater-than, ampersand, no-break space, and +/// double quote. +MOZ_NEVER_INLINE uint32_t CountEscapedInAttributeValue(const char16_t* aPtr, + const char16_t* aEnd) { + return detail::CountEscaped(aPtr, aEnd, true); +} + /// The innerHTML / DOMParser case for the data state in the HTML parser MOZ_NEVER_INLINE int32_t AccelerateDataFastest(const char16_t* aPtr, const char16_t* aEnd) { diff --git a/parser/htmlaccel/htmlaccelNotInline.h b/parser/htmlaccel/htmlaccelNotInline.h @@ -17,6 +17,48 @@ namespace mozilla::htmlaccel { MOZ_NEVER_INLINE bool ContainsMarkup(const char16_t* aPtr, const char16_t* aEnd); +// HTML Serializer functions + +// NOTE! The SkipNonEscaped functions only skip full SIMD strides, but the +// CountEscaped functions also handle the tail that doesn't fit in a SIMD +// stride. + +/// Returns the length of the prefix that does not contain less-than, +/// greater-than, ampersand, or no-break space. +/// Examines the input in multiples of 16 code units and does not examine +/// the tail that is left over. +MOZ_NEVER_INLINE size_t SkipNonEscapedInTextNode(const char16_t* aPtr, + const char16_t* aEnd); + +/// Returns the length of the prefix that does not contain less-than, +/// greater-than, ampersand, or no-break space. +/// Examines the input in multiples of 16 code units and does not examine +/// the tail that is left over. +MOZ_NEVER_INLINE size_t SkipNonEscapedInTextNode(const char* aPtr, + const char* aEnd); + +/// Returns the length of the prefix that does not contain less-than, +/// greater-than, ampersand, no-break space, or double quote. +/// Examines the input in multiples of 16 code units and does not examine +/// the tail that is left over. +MOZ_NEVER_INLINE size_t SkipNonEscapedInAttributeValue(const char16_t* aPtr, + const char16_t* aEnd); + +/// Count occurrences of less-than, greater-than, ampersand, and no-break space. +MOZ_NEVER_INLINE uint32_t CountEscapedInTextNode(const char16_t* aPtr, + const char16_t* aEnd); + +/// Count occurrences of less-than, greater-than, ampersand, and no-break space. +MOZ_NEVER_INLINE uint32_t CountEscapedInTextNode(const char* aPtr, + const char* aEnd); + +/// Count occurrences of less-than, greater-than, ampersand, no-break space, and +/// double quote. +MOZ_NEVER_INLINE uint32_t CountEscapedInAttributeValue(const char16_t* aPtr, + const char16_t* aEnd); + +// HTML Tokenizer functions + // Logically these should be MOZ_ALWAYS_INLINE_EVEN_DEBUG if LLVM was working // as expected. However, these are MOZ_NEVER_INLINE to work around // https://github.com/llvm/llvm-project/issues/160886 . This way, we get