tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit 4d2d424e3c1e9fc5df7162cd85d413627998727b
parent 81f5a1f4eb72c479f7759641ee564ee4c1eee038
Author: Henri Sivonen <hsivonen@hsivonen.fi>
Date:   Wed,  5 Nov 2025 10:25:53 +0000

Bug 1997859 - Accelerate ContainsMarkup in innerHTML setter with SIMD. r=smaug

Differential Revision: https://phabricator.services.mozilla.com/D271012

Diffstat:
Mdom/base/FragmentOrElement.cpp | 17+++++++++++++++++
Mparser/htmlaccel/htmlaccel.h | 29+++++++++++++++++++++++++++++
Mparser/htmlaccel/htmlaccelNotInline.cpp | 9+++++++++
Mparser/htmlaccel/htmlaccelNotInline.h | 9+++++++++
4 files changed, 64 insertions(+), 0 deletions(-)

diff --git a/dom/base/FragmentOrElement.cpp b/dom/base/FragmentOrElement.cpp @@ -73,6 +73,8 @@ #include "mozilla/dom/NodeListBinding.h" #include "mozilla/dom/SVGUseElement.h" #include "mozilla/dom/ShadowRoot.h" +#include "mozilla/htmlaccel/htmlaccelEnabled.h" +#include "mozilla/htmlaccel/htmlaccelNotInline.h" #include "nsCCUncollectableMarker.h" #include "nsChildContentList.h" #include "nsContentCreatorFunctions.h" @@ -1917,6 +1919,21 @@ static bool ContainsMarkup(const nsAString& aStr) { const char16_t* start = aStr.BeginReading(); const char16_t* end = aStr.EndReading(); + if (mozilla::htmlaccel::htmlaccelEnabled()) { + // We need to check for the empty string in order to + // dereference `start` for the '<' check. We might as well + // check that we have a full SIMD stride. + if (end - start >= 16) { + // Optimize the case where the input starts with a tag. + if (*start == u'<') { + return true; + } + // Curiously, this doesn't look like much of an optimization on Zen 3, + // but since it is an optimization on M3 Pro and Skylake, let's do this. + return mozilla::htmlaccel::ContainsMarkup(start, end); + } + } + while (start != end) { char16_t c = *start; if (c == char16_t('<') || c == char16_t('&') || c == char16_t('\r') || diff --git a/parser/htmlaccel/htmlaccel.h b/parser/htmlaccel/htmlaccel.h @@ -313,6 +313,35 @@ MOZ_ALWAYS_INLINE_EVEN_DEBUG int32_t AccelerateTextNode(const char16_t* aInput, return int32_t(current - aInput); } +MOZ_ALWAYS_INLINE_EVEN_DEBUG bool ContainsMarkup(const char16_t* aInput, + const char16_t* aEnd) { + const char16_t* current = aInput; + while (aEnd - current >= 16) { + uint8x16_t mask = StrideToMask(current, ZERO_LT_AMP_CR, true); +#if defined(__aarch64__) + uint8_t max = vmaxvq_u8(mask); + if (max != 0) { + return true; + } +#else // x86/x86_64 + int int_mask = _mm_movemask_epi8(mask); + if (int_mask != 0) { + return true; + } +#endif + current += 16; + } + while (current != aEnd) { + char16_t c = *current; + if (c == char16_t('<') || c == char16_t('&') || c == char16_t('\r') || + c == char16_t('\0')) { + return true; + } + ++current; + } + return false; +} + } // namespace detail // Public entry points are in htmlaccelNotInline.h for now. diff --git a/parser/htmlaccel/htmlaccelNotInline.cpp b/parser/htmlaccel/htmlaccelNotInline.cpp @@ -7,6 +7,15 @@ namespace mozilla::htmlaccel { +// TODO: Perhaps inlining this one on aarch64 wouldn't run into the +// LLVM LICM vs. regalloc bug. But then, inlining this would only +// avoid the overhead of one function call and wouldn't reuse the +// SIMD contants in a useful way. +MOZ_NEVER_INLINE bool ContainsMarkup(const char16_t* aPtr, + const char16_t* aEnd) { + return detail::ContainsMarkup(aPtr, aEnd); +} + /// The innerHTML / DOMParser case for the data state in the HTML parser MOZ_NEVER_INLINE int32_t AccelerateDataFastest(const char16_t* aPtr, const char16_t* aEnd) { diff --git a/parser/htmlaccel/htmlaccelNotInline.h b/parser/htmlaccel/htmlaccelNotInline.h @@ -8,6 +8,15 @@ #include "mozilla/Attributes.h" namespace mozilla::htmlaccel { +// True iff the buffer contains less-than, ampersand, carriage return, +// or U+0000. +// +// This one could probably be inline without LLVM issues when SIMD +// acceleration is statically enabled, but it's probably not worth +// the complexity to do that. +MOZ_NEVER_INLINE bool ContainsMarkup(const char16_t* aPtr, + const char16_t* aEnd); + // Logically these should be MOZ_ALWAYS_INLINE_EVEN_DEBUG if LLVM was working // as expected. However, these are MOZ_NEVER_INLINE to work around // https://github.com/llvm/llvm-project/issues/160886 . This way, we get