tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit 87c044b95f419b4d461eac12af9372e0df389307
parent 612f84975ef380bbaaf5939051f0c33d627c3318
Author: André Bargull <andre.bargull@gmail.com>
Date:   Mon, 20 Oct 2025 12:03:54 +0000

Bug 1994067 - Part 4: Improve performance of Uint8Array.fromBase64. r=spidermonkey-reviewers,iain

Changes:
- Add templates for shared and unshared memory and Latin-1 and Two-Byte strings
  like in part 3.
- Move the error reporting to the caller. This avoids hazard errors and should
  make it easier to integrate simdutf.
- Directly allocate a large enough byte vector in `fromBase64` to avoid fallible
  `append` operations.

For a 5KB Uint8Array, about 80-85% of the time is now spend in `FromBase64`. The
rest is call and allocation overhead.

Differential Revision: https://phabricator.services.mozilla.com/D269018

Diffstat:
Mjs/src/vm/TypedArrayObject.cpp | 382+++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
1 file changed, 228 insertions(+), 154 deletions(-)

diff --git a/js/src/vm/TypedArrayObject.cpp b/js/src/vm/TypedArrayObject.cpp @@ -4320,56 +4320,6 @@ TypedArrayObject* js::TypedArraySubarrayRecover(JSContext* cx, using ByteVector = js::Vector<uint8_t, FixedLengthTypedArrayObject::INLINE_BUFFER_LIMIT>; -class ByteSink final { - ByteVector& bytes_; - - public: - explicit ByteSink(ByteVector& bytes) : bytes_(bytes) { - MOZ_ASSERT(bytes.empty()); - } - - constexpr bool canAppend(size_t n = 1) const { return true; } - - template <typename... Args> - bool append(Args... args) { - if (!bytes_.reserve(bytes_.length() + sizeof...(args))) { - return false; - } - (bytes_.infallibleAppend(args), ...); - return true; - } -}; - -class TypedArraySink final { - Handle<TypedArrayObject*> typedArray_; - size_t maxLength_; - size_t index_ = 0; - - public: - TypedArraySink(Handle<TypedArrayObject*> typedArray, size_t maxLength) - : typedArray_(typedArray), maxLength_(maxLength) { - MOZ_ASSERT(typedArray->type() == Scalar::Uint8); - - // The underlying buffer must neither be detached nor shrunk. (It may have - // been grown when it's a growable shared buffer and a concurrent thread - // resized the buffer.) - MOZ_ASSERT(!typedArray->hasDetachedBuffer()); - MOZ_ASSERT(typedArray->length().valueOr(0) >= maxLength); - } - - size_t written() const { return index_; } - - bool canAppend(size_t n = 1) const { return maxLength_ - index_ >= n; } - - template <typename... Args> - bool append(Args... args) { - MOZ_ASSERT(canAppend(sizeof...(args))); - (TypedArrayObjectTemplate<uint8_t>::setIndex(*typedArray_, index_++, args), - ...); - return true; - } -}; - static UniqueChars QuoteString(JSContext* cx, char16_t ch) { Sprinter sprinter(cx); if (!sprinter.init()) { @@ -4632,74 +4582,135 @@ enum class LastChunkHandling { StopBeforePartial, }; +enum class Base64Error { + None, + BadChar, + BadCharAfterPadding, + IncompleteChunk, + MissingPadding, + ExtraBits, +}; + +struct Base64Result { + Base64Error error; + size_t index; + size_t written; + + bool isError() const { return error != Base64Error::None; } + + static auto Ok(size_t index, size_t written) { + return Base64Result{Base64Error::None, index, written}; + } + + static auto Error(Base64Error error) { + MOZ_ASSERT(error != Base64Error::None); + return Base64Result{error, 0, 0}; + } + + static auto ErrorAt(Base64Error error, size_t index) { + MOZ_ASSERT(error != Base64Error::None); + return Base64Result{error, index, 0}; + } +}; + +static void ReportBase64Error(JSContext* cx, Base64Result result, + JSLinearString* string) { + MOZ_ASSERT(result.isError()); + switch (result.error) { + case Base64Error::None: + break; + case Base64Error::BadChar: + if (auto str = + QuoteString(cx, string->latin1OrTwoByteChar(result.index))) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_TYPED_ARRAY_BAD_BASE64_CHAR, str.get()); + } + return; + case Base64Error::BadCharAfterPadding: + if (auto str = + QuoteString(cx, string->latin1OrTwoByteChar(result.index))) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_TYPED_ARRAY_BAD_BASE64_AFTER_PADDING, + str.get()); + } + return; + case Base64Error::IncompleteChunk: + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_TYPED_ARRAY_BAD_INCOMPLETE_CHUNK); + return; + case Base64Error::MissingPadding: + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_TYPED_ARRAY_MISSING_BASE64_PADDING); + return; + case Base64Error::ExtraBits: + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_TYPED_ARRAY_EXTRA_BASE64_BITS); + return; + } + MOZ_CRASH("unexpected base64 error"); +} + /** * FromBase64 ( string, alphabet, lastChunkHandling [ , maxLength ] ) * * https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 */ -template <class Sink> -static bool FromBase64(JSContext* cx, Handle<JSString*> string, - Alphabet alphabet, LastChunkHandling lastChunkHandling, - Sink& sink, size_t* readLength) { - // Steps 1-2. (Not applicable in our implementation.) +template <class Ops, typename CharT> +static auto FromBase64(const CharT* chars, size_t length, Alphabet alphabet, + LastChunkHandling lastChunkHandling, + SharedMem<uint8_t*> data, size_t maxLength) { + const SharedMem<uint8_t*> dataBegin = data; + const SharedMem<uint8_t*> dataEnd = data + maxLength; - // Step 3. - if (!sink.canAppend()) { - *readLength = 0; - return true; - } + auto canAppend = [&](size_t n) { return data + n <= dataEnd; }; - JSLinearString* linear = string->ensureLinear(cx); - if (!linear) { - return false; - } + auto written = [&]() { return data.unwrap() - dataBegin.unwrap(); }; // DecodeBase64Chunk ( chunk [ , throwOnExtraBits ] ) // // Encode a complete base64 chunk. auto decodeChunk = [&](uint32_t chunk) { MOZ_ASSERT(chunk <= 0xffffff); - return sink.append(chunk >> 16, chunk >> 8, chunk); + MOZ_ASSERT(canAppend(3)); + Ops::store(data++, uint8_t(chunk >> 16)); + Ops::store(data++, uint8_t(chunk >> 8)); + Ops::store(data++, uint8_t(chunk)); }; // DecodeBase64Chunk ( chunk [ , throwOnExtraBits ] ) // // Encode a three element partial base64 chunk. - auto decodeChunk3 = [&](uint32_t chunk, bool throwOnExtraBits) { + auto decodeChunk3 = [&](uint32_t chunk) { MOZ_ASSERT(chunk <= 0x3ffff); - - if (throwOnExtraBits && (chunk & 0x3) != 0) { - JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, - JSMSG_TYPED_ARRAY_EXTRA_BASE64_BITS); - return false; - } - return sink.append(chunk >> 10, chunk >> 2); + MOZ_ASSERT(canAppend(2)); + Ops::store(data++, uint8_t(chunk >> 10)); + Ops::store(data++, uint8_t(chunk >> 2)); }; // DecodeBase64Chunk ( chunk [ , throwOnExtraBits ] ) // // Encode a two element partial base64 chunk. - auto decodeChunk2 = [&](uint32_t chunk, bool throwOnExtraBits) { + auto decodeChunk2 = [&](uint32_t chunk) { MOZ_ASSERT(chunk <= 0xfff); - - if (throwOnExtraBits && (chunk & 0xf) != 0) { - JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, - JSMSG_TYPED_ARRAY_EXTRA_BASE64_BITS); - return false; - } - return sink.append(chunk >> 4); + MOZ_ASSERT(canAppend(1)); + Ops::store(data++, uint8_t(chunk >> 4)); }; // DecodeBase64Chunk ( chunk [ , throwOnExtraBits ] ) // // Encode a partial base64 chunk. - auto decodePartialChunk = [&](uint32_t chunk, uint32_t chunkLength, - bool throwOnExtraBits = false) { + auto decodePartialChunk = [&](uint32_t chunk, uint32_t chunkLength) { MOZ_ASSERT(chunkLength == 2 || chunkLength == 3); - return chunkLength == 2 ? decodeChunk2(chunk, throwOnExtraBits) - : decodeChunk3(chunk, throwOnExtraBits); + chunkLength == 2 ? decodeChunk2(chunk) : decodeChunk3(chunk); }; + // Steps 1-2. (Not applicable in our implementation.) + + // Step 3. + if (maxLength == 0) { + return Base64Result::Ok(0, 0); + } + // Step 4. // // String index after the last fully read base64 chunk. @@ -4722,8 +4733,7 @@ static bool FromBase64(JSContext* cx, Handle<JSString*> string, // Current string index. size_t index = 0; - // Step 9. - size_t length = linear->length(); + // Step 9. (Passed as parameter) const auto& decode = alphabet == Alphabet::Base64 ? Base64::Decode::Base64 : Base64::Decode::Base64Url; @@ -4731,7 +4741,7 @@ static bool FromBase64(JSContext* cx, Handle<JSString*> string, // Step 10. for (; index < length; index++) { // Step 10.c. (Reordered) - char16_t ch = linear->latin1OrTwoByteChar(index); + auto ch = chars[index]; // Step 10.a. if (mozilla::IsAsciiWhitespace(ch)) { @@ -4753,19 +4763,14 @@ static bool FromBase64(JSContext* cx, Handle<JSString*> string, value = decode[ch]; } if (MOZ_UNLIKELY(value == Base64::InvalidChar)) { - if (auto str = QuoteString(cx, ch)) { - JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, - JSMSG_TYPED_ARRAY_BAD_BASE64_CHAR, str.get()); - } - return false; + return Base64Result::ErrorAt(Base64Error::BadChar, index); } // Step 10.h. (Not applicable in our implementation.) // Step 10.i. - if (chunkLength > 1 && !sink.canAppend(chunkLength)) { - *readLength = read; - return true; + if (chunkLength > 1 && !canAppend(chunkLength)) { + return Base64Result::Ok(read, written()); } // Step 10.j. @@ -4777,9 +4782,7 @@ static bool FromBase64(JSContext* cx, Handle<JSString*> string, // Step 10.l. if (chunkLength == 4) { // Step 10.l.i. - if (!decodeChunk(chunk)) { - return false; - } + decodeChunk(chunk); // Step 10.l.ii. chunk = 0; @@ -4793,9 +4796,8 @@ static bool FromBase64(JSContext* cx, Handle<JSString*> string, read = index + 1; // Step 10.l.v. - if (!sink.canAppend()) { - *readLength = read; - return true; + if (!canAppend(1)) { + return Base64Result::Ok(read, written()); } } } @@ -4806,55 +4808,45 @@ static bool FromBase64(JSContext* cx, Handle<JSString*> string, if (chunkLength > 0) { // Step 10.b.i.1. if (lastChunkHandling == LastChunkHandling::StopBeforePartial) { - *readLength = read; - return true; + return Base64Result::Ok(read, written()); } // Steps 10.b.i.2-3. if (lastChunkHandling == LastChunkHandling::Loose) { // Step 10.b.i.2.a. if (chunkLength == 1) { - JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, - JSMSG_TYPED_ARRAY_BAD_INCOMPLETE_CHUNK); - return false; + return Base64Result::Error(Base64Error::IncompleteChunk); } MOZ_ASSERT(chunkLength == 2 || chunkLength == 3); // Step 10.b.i.2.b. - if (!decodePartialChunk(chunk, chunkLength)) { - return false; - } + decodePartialChunk(chunk, chunkLength); } else { // Step 10.b.i.3.a. MOZ_ASSERT(lastChunkHandling == LastChunkHandling::Strict); // Step 10.b.i.3.b. - JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, - JSMSG_TYPED_ARRAY_BAD_INCOMPLETE_CHUNK); - return false; + return Base64Result::Error(Base64Error::IncompleteChunk); } } // Step 10.b.ii. - *readLength = length; - return true; + return Base64Result::Ok(length, written()); } // Step 10.e. MOZ_ASSERT(index < length); - MOZ_ASSERT(linear->latin1OrTwoByteChar(index) == '='); + MOZ_ASSERT(chars[index] == '='); // Step 10.e.i. if (chunkLength < 2) { - JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, - JSMSG_TYPED_ARRAY_BAD_INCOMPLETE_CHUNK); - return false; + return Base64Result::Error(Base64Error::IncompleteChunk); } MOZ_ASSERT(chunkLength == 2 || chunkLength == 3); // Step 10.e.ii. (Inlined SkipAsciiWhitespace) while (++index < length) { - char16_t ch = linear->latin1OrTwoByteChar(index); + auto ch = chars[index]; if (!mozilla::IsAsciiWhitespace(ch)) { break; } @@ -4866,24 +4858,21 @@ static bool FromBase64(JSContext* cx, Handle<JSString*> string, if (index == length) { // Step 10.e.iii.1.a. if (lastChunkHandling == LastChunkHandling::StopBeforePartial) { - *readLength = read; - return true; + return Base64Result::Ok(read, written()); } // Step 10.e.iii.1.b. - JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, - JSMSG_TYPED_ARRAY_MISSING_BASE64_PADDING); - return false; + return Base64Result::Error(Base64Error::MissingPadding); } // Step 10.e.iii.2. - char16_t ch = linear->latin1OrTwoByteChar(index); + auto ch = chars[index]; // Step 10.e.iii.3. if (ch == '=') { // Step 10.e.iii.3.a. (Inlined SkipAsciiWhitespace) while (++index < length) { - char16_t ch = linear->latin1OrTwoByteChar(index); + auto ch = chars[index]; if (!mozilla::IsAsciiWhitespace(ch)) { break; } @@ -4893,26 +4882,79 @@ static bool FromBase64(JSContext* cx, Handle<JSString*> string, // Step 10.e.iv. if (index < length) { - char16_t ch = linear->latin1OrTwoByteChar(index); - if (auto str = QuoteString(cx, ch)) { - JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, - JSMSG_TYPED_ARRAY_BAD_BASE64_AFTER_PADDING, - str.get()); - } - return false; + return Base64Result::ErrorAt(Base64Error::BadCharAfterPadding, index); } // Steps 10.e.v-vi. - bool throwOnExtraBits = lastChunkHandling == LastChunkHandling::Strict; + if (lastChunkHandling == LastChunkHandling::Strict) { + uint32_t extraBitsMask = chunkLength == 2 ? 0xf : 0x3; + if ((chunk & extraBitsMask) != 0) { + return Base64Result::Error(Base64Error::ExtraBits); + } + } // Step 10.e.vii. - if (!decodePartialChunk(chunk, chunkLength, throwOnExtraBits)) { - return false; - } + decodePartialChunk(chunk, chunkLength); // Step 10.e.viii. - *readLength = length; - return true; + return Base64Result::Ok(length, written()); +} + +/** + * FromBase64 ( string, alphabet, lastChunkHandling [ , maxLength ] ) + * + * https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 + */ +template <class Ops> +static auto FromBase64(JSLinearString* string, Alphabet alphabet, + LastChunkHandling lastChunkHandling, + SharedMem<uint8_t*> data, size_t maxLength) { + JS::AutoCheckCannotGC nogc; + if (string->hasLatin1Chars()) { + return FromBase64<Ops>(string->latin1Chars(nogc), string->length(), + alphabet, lastChunkHandling, data, maxLength); + } + return FromBase64<Ops>(string->twoByteChars(nogc), string->length(), alphabet, + lastChunkHandling, data, maxLength); +} + +/** + * FromBase64 ( string, alphabet, lastChunkHandling [ , maxLength ] ) + * + * https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 + */ +static auto FromBase64(JSLinearString* string, Alphabet alphabet, + LastChunkHandling lastChunkHandling, + TypedArrayObject* tarray, size_t maxLength) { + MOZ_ASSERT(tarray->type() == Scalar::Uint8); + + // The underlying buffer must neither be detached nor shrunk. (It may have + // been grown when it's a growable shared buffer and a concurrent thread + // resized the buffer.) + MOZ_ASSERT(!tarray->hasDetachedBuffer()); + MOZ_ASSERT(tarray->length().valueOr(0) >= maxLength); + + auto data = tarray->dataPointerEither().cast<uint8_t*>(); + + if (tarray->isSharedMemory()) { + return FromBase64<SharedOps>(string, alphabet, lastChunkHandling, data, + maxLength); + } + return FromBase64<UnsharedOps>(string, alphabet, lastChunkHandling, data, + maxLength); +} + +/** + * FromBase64 ( string, alphabet, lastChunkHandling [ , maxLength ] ) + * + * https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 + */ +static auto FromBase64(JSLinearString* string, Alphabet alphabet, + LastChunkHandling lastChunkHandling, ByteVector& bytes) { + auto data = SharedMem<uint8_t*>::unshared(bytes.begin()); + size_t maxLength = bytes.length(); + return FromBase64<UnsharedOps>(string, alphabet, lastChunkHandling, data, + maxLength); } /** @@ -5063,17 +5105,38 @@ static bool uint8array_fromBase64(JSContext* cx, unsigned argc, Value* vp) { } } + // Compute the output byte length. Four input characters are decoded into + // three bytes, so the output length can't be larger than ⌈length × 3/4⌉. + auto outLength = mozilla::CheckedInt<size_t>{string->length()}; + outLength += 3; + outLength /= 4; + outLength *= 3; + MOZ_ASSERT(outLength.isValid(), "can't overflow"); + + static_assert(JSString::MAX_LENGTH <= TypedArrayObject::ByteLengthLimit, + "string length doesn't exceed maximum typed array length"); + // Step 10. ByteVector bytes(cx); - ByteSink sink{bytes}; - size_t unusedReadLength; - if (!FromBase64(cx, string, alphabet, lastChunkHandling, sink, - &unusedReadLength)) { + if (!bytes.resizeUninitialized(outLength.value())) { + return false; + } + + JSLinearString* linear = string->ensureLinear(cx); + if (!linear) { + return false; + } + + auto result = FromBase64(linear, alphabet, lastChunkHandling, bytes); + if (MOZ_UNLIKELY(result.isError())) { + ReportBase64Error(cx, result, linear); return false; } + MOZ_ASSERT(result.index <= linear->length()); + MOZ_ASSERT(result.written <= bytes.length()); // Step 11. - size_t resultLength = bytes.length(); + size_t resultLength = result.written; // Step 12. auto* tarray = @@ -5189,16 +5252,27 @@ static bool uint8array_setFromBase64(JSContext* cx, const CallArgs& args) { return false; } - // Steps 15-17. - ByteVector bytes(cx); - TypedArraySink sink{tarray, *length}; - size_t readLength; - if (!FromBase64(cx, string, alphabet, lastChunkHandling, sink, &readLength)) { - return false; - } + // Steps 15-18. + size_t readLength = 0; + size_t written = 0; + if (*length > 0) { + JSLinearString* linear = string->ensureLinear(cx); + if (!linear) { + return false; + } - // Step 18. - size_t written = sink.written(); + auto result = + FromBase64(linear, alphabet, lastChunkHandling, tarray, *length); + if (MOZ_UNLIKELY(result.isError())) { + ReportBase64Error(cx, result, linear); + return false; + } + MOZ_ASSERT(result.index <= linear->length()); + MOZ_ASSERT(result.written <= *length); + + readLength = result.index; + written = result.written; + } // Steps 19-21. (Not applicable in our implementation.)