commit 87c044b95f419b4d461eac12af9372e0df389307
parent 612f84975ef380bbaaf5939051f0c33d627c3318
Author: André Bargull <andre.bargull@gmail.com>
Date: Mon, 20 Oct 2025 12:03:54 +0000
Bug 1994067 - Part 4: Improve performance of Uint8Array.fromBase64. r=spidermonkey-reviewers,iain
Changes:
- Add templates for shared and unshared memory and Latin-1 and Two-Byte strings
like in part 3.
- Move the error reporting to the caller. This avoids hazard errors and should
make it easier to integrate simdutf.
- Directly allocate a large enough byte vector in `fromBase64` to avoid fallible
`append` operations.
For a 5KB Uint8Array, about 80-85% of the time is now spend in `FromBase64`. The
rest is call and allocation overhead.
Differential Revision: https://phabricator.services.mozilla.com/D269018
Diffstat:
1 file changed, 228 insertions(+), 154 deletions(-)
diff --git a/js/src/vm/TypedArrayObject.cpp b/js/src/vm/TypedArrayObject.cpp
@@ -4320,56 +4320,6 @@ TypedArrayObject* js::TypedArraySubarrayRecover(JSContext* cx,
using ByteVector =
js::Vector<uint8_t, FixedLengthTypedArrayObject::INLINE_BUFFER_LIMIT>;
-class ByteSink final {
- ByteVector& bytes_;
-
- public:
- explicit ByteSink(ByteVector& bytes) : bytes_(bytes) {
- MOZ_ASSERT(bytes.empty());
- }
-
- constexpr bool canAppend(size_t n = 1) const { return true; }
-
- template <typename... Args>
- bool append(Args... args) {
- if (!bytes_.reserve(bytes_.length() + sizeof...(args))) {
- return false;
- }
- (bytes_.infallibleAppend(args), ...);
- return true;
- }
-};
-
-class TypedArraySink final {
- Handle<TypedArrayObject*> typedArray_;
- size_t maxLength_;
- size_t index_ = 0;
-
- public:
- TypedArraySink(Handle<TypedArrayObject*> typedArray, size_t maxLength)
- : typedArray_(typedArray), maxLength_(maxLength) {
- MOZ_ASSERT(typedArray->type() == Scalar::Uint8);
-
- // The underlying buffer must neither be detached nor shrunk. (It may have
- // been grown when it's a growable shared buffer and a concurrent thread
- // resized the buffer.)
- MOZ_ASSERT(!typedArray->hasDetachedBuffer());
- MOZ_ASSERT(typedArray->length().valueOr(0) >= maxLength);
- }
-
- size_t written() const { return index_; }
-
- bool canAppend(size_t n = 1) const { return maxLength_ - index_ >= n; }
-
- template <typename... Args>
- bool append(Args... args) {
- MOZ_ASSERT(canAppend(sizeof...(args)));
- (TypedArrayObjectTemplate<uint8_t>::setIndex(*typedArray_, index_++, args),
- ...);
- return true;
- }
-};
-
static UniqueChars QuoteString(JSContext* cx, char16_t ch) {
Sprinter sprinter(cx);
if (!sprinter.init()) {
@@ -4632,74 +4582,135 @@ enum class LastChunkHandling {
StopBeforePartial,
};
+enum class Base64Error {
+ None,
+ BadChar,
+ BadCharAfterPadding,
+ IncompleteChunk,
+ MissingPadding,
+ ExtraBits,
+};
+
+struct Base64Result {
+ Base64Error error;
+ size_t index;
+ size_t written;
+
+ bool isError() const { return error != Base64Error::None; }
+
+ static auto Ok(size_t index, size_t written) {
+ return Base64Result{Base64Error::None, index, written};
+ }
+
+ static auto Error(Base64Error error) {
+ MOZ_ASSERT(error != Base64Error::None);
+ return Base64Result{error, 0, 0};
+ }
+
+ static auto ErrorAt(Base64Error error, size_t index) {
+ MOZ_ASSERT(error != Base64Error::None);
+ return Base64Result{error, index, 0};
+ }
+};
+
+static void ReportBase64Error(JSContext* cx, Base64Result result,
+ JSLinearString* string) {
+ MOZ_ASSERT(result.isError());
+ switch (result.error) {
+ case Base64Error::None:
+ break;
+ case Base64Error::BadChar:
+ if (auto str =
+ QuoteString(cx, string->latin1OrTwoByteChar(result.index))) {
+ JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
+ JSMSG_TYPED_ARRAY_BAD_BASE64_CHAR, str.get());
+ }
+ return;
+ case Base64Error::BadCharAfterPadding:
+ if (auto str =
+ QuoteString(cx, string->latin1OrTwoByteChar(result.index))) {
+ JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
+ JSMSG_TYPED_ARRAY_BAD_BASE64_AFTER_PADDING,
+ str.get());
+ }
+ return;
+ case Base64Error::IncompleteChunk:
+ JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
+ JSMSG_TYPED_ARRAY_BAD_INCOMPLETE_CHUNK);
+ return;
+ case Base64Error::MissingPadding:
+ JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
+ JSMSG_TYPED_ARRAY_MISSING_BASE64_PADDING);
+ return;
+ case Base64Error::ExtraBits:
+ JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
+ JSMSG_TYPED_ARRAY_EXTRA_BASE64_BITS);
+ return;
+ }
+ MOZ_CRASH("unexpected base64 error");
+}
+
/**
* FromBase64 ( string, alphabet, lastChunkHandling [ , maxLength ] )
*
* https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64
*/
-template <class Sink>
-static bool FromBase64(JSContext* cx, Handle<JSString*> string,
- Alphabet alphabet, LastChunkHandling lastChunkHandling,
- Sink& sink, size_t* readLength) {
- // Steps 1-2. (Not applicable in our implementation.)
+template <class Ops, typename CharT>
+static auto FromBase64(const CharT* chars, size_t length, Alphabet alphabet,
+ LastChunkHandling lastChunkHandling,
+ SharedMem<uint8_t*> data, size_t maxLength) {
+ const SharedMem<uint8_t*> dataBegin = data;
+ const SharedMem<uint8_t*> dataEnd = data + maxLength;
- // Step 3.
- if (!sink.canAppend()) {
- *readLength = 0;
- return true;
- }
+ auto canAppend = [&](size_t n) { return data + n <= dataEnd; };
- JSLinearString* linear = string->ensureLinear(cx);
- if (!linear) {
- return false;
- }
+ auto written = [&]() { return data.unwrap() - dataBegin.unwrap(); };
// DecodeBase64Chunk ( chunk [ , throwOnExtraBits ] )
//
// Encode a complete base64 chunk.
auto decodeChunk = [&](uint32_t chunk) {
MOZ_ASSERT(chunk <= 0xffffff);
- return sink.append(chunk >> 16, chunk >> 8, chunk);
+ MOZ_ASSERT(canAppend(3));
+ Ops::store(data++, uint8_t(chunk >> 16));
+ Ops::store(data++, uint8_t(chunk >> 8));
+ Ops::store(data++, uint8_t(chunk));
};
// DecodeBase64Chunk ( chunk [ , throwOnExtraBits ] )
//
// Encode a three element partial base64 chunk.
- auto decodeChunk3 = [&](uint32_t chunk, bool throwOnExtraBits) {
+ auto decodeChunk3 = [&](uint32_t chunk) {
MOZ_ASSERT(chunk <= 0x3ffff);
-
- if (throwOnExtraBits && (chunk & 0x3) != 0) {
- JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
- JSMSG_TYPED_ARRAY_EXTRA_BASE64_BITS);
- return false;
- }
- return sink.append(chunk >> 10, chunk >> 2);
+ MOZ_ASSERT(canAppend(2));
+ Ops::store(data++, uint8_t(chunk >> 10));
+ Ops::store(data++, uint8_t(chunk >> 2));
};
// DecodeBase64Chunk ( chunk [ , throwOnExtraBits ] )
//
// Encode a two element partial base64 chunk.
- auto decodeChunk2 = [&](uint32_t chunk, bool throwOnExtraBits) {
+ auto decodeChunk2 = [&](uint32_t chunk) {
MOZ_ASSERT(chunk <= 0xfff);
-
- if (throwOnExtraBits && (chunk & 0xf) != 0) {
- JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
- JSMSG_TYPED_ARRAY_EXTRA_BASE64_BITS);
- return false;
- }
- return sink.append(chunk >> 4);
+ MOZ_ASSERT(canAppend(1));
+ Ops::store(data++, uint8_t(chunk >> 4));
};
// DecodeBase64Chunk ( chunk [ , throwOnExtraBits ] )
//
// Encode a partial base64 chunk.
- auto decodePartialChunk = [&](uint32_t chunk, uint32_t chunkLength,
- bool throwOnExtraBits = false) {
+ auto decodePartialChunk = [&](uint32_t chunk, uint32_t chunkLength) {
MOZ_ASSERT(chunkLength == 2 || chunkLength == 3);
- return chunkLength == 2 ? decodeChunk2(chunk, throwOnExtraBits)
- : decodeChunk3(chunk, throwOnExtraBits);
+ chunkLength == 2 ? decodeChunk2(chunk) : decodeChunk3(chunk);
};
+ // Steps 1-2. (Not applicable in our implementation.)
+
+ // Step 3.
+ if (maxLength == 0) {
+ return Base64Result::Ok(0, 0);
+ }
+
// Step 4.
//
// String index after the last fully read base64 chunk.
@@ -4722,8 +4733,7 @@ static bool FromBase64(JSContext* cx, Handle<JSString*> string,
// Current string index.
size_t index = 0;
- // Step 9.
- size_t length = linear->length();
+ // Step 9. (Passed as parameter)
const auto& decode = alphabet == Alphabet::Base64 ? Base64::Decode::Base64
: Base64::Decode::Base64Url;
@@ -4731,7 +4741,7 @@ static bool FromBase64(JSContext* cx, Handle<JSString*> string,
// Step 10.
for (; index < length; index++) {
// Step 10.c. (Reordered)
- char16_t ch = linear->latin1OrTwoByteChar(index);
+ auto ch = chars[index];
// Step 10.a.
if (mozilla::IsAsciiWhitespace(ch)) {
@@ -4753,19 +4763,14 @@ static bool FromBase64(JSContext* cx, Handle<JSString*> string,
value = decode[ch];
}
if (MOZ_UNLIKELY(value == Base64::InvalidChar)) {
- if (auto str = QuoteString(cx, ch)) {
- JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
- JSMSG_TYPED_ARRAY_BAD_BASE64_CHAR, str.get());
- }
- return false;
+ return Base64Result::ErrorAt(Base64Error::BadChar, index);
}
// Step 10.h. (Not applicable in our implementation.)
// Step 10.i.
- if (chunkLength > 1 && !sink.canAppend(chunkLength)) {
- *readLength = read;
- return true;
+ if (chunkLength > 1 && !canAppend(chunkLength)) {
+ return Base64Result::Ok(read, written());
}
// Step 10.j.
@@ -4777,9 +4782,7 @@ static bool FromBase64(JSContext* cx, Handle<JSString*> string,
// Step 10.l.
if (chunkLength == 4) {
// Step 10.l.i.
- if (!decodeChunk(chunk)) {
- return false;
- }
+ decodeChunk(chunk);
// Step 10.l.ii.
chunk = 0;
@@ -4793,9 +4796,8 @@ static bool FromBase64(JSContext* cx, Handle<JSString*> string,
read = index + 1;
// Step 10.l.v.
- if (!sink.canAppend()) {
- *readLength = read;
- return true;
+ if (!canAppend(1)) {
+ return Base64Result::Ok(read, written());
}
}
}
@@ -4806,55 +4808,45 @@ static bool FromBase64(JSContext* cx, Handle<JSString*> string,
if (chunkLength > 0) {
// Step 10.b.i.1.
if (lastChunkHandling == LastChunkHandling::StopBeforePartial) {
- *readLength = read;
- return true;
+ return Base64Result::Ok(read, written());
}
// Steps 10.b.i.2-3.
if (lastChunkHandling == LastChunkHandling::Loose) {
// Step 10.b.i.2.a.
if (chunkLength == 1) {
- JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
- JSMSG_TYPED_ARRAY_BAD_INCOMPLETE_CHUNK);
- return false;
+ return Base64Result::Error(Base64Error::IncompleteChunk);
}
MOZ_ASSERT(chunkLength == 2 || chunkLength == 3);
// Step 10.b.i.2.b.
- if (!decodePartialChunk(chunk, chunkLength)) {
- return false;
- }
+ decodePartialChunk(chunk, chunkLength);
} else {
// Step 10.b.i.3.a.
MOZ_ASSERT(lastChunkHandling == LastChunkHandling::Strict);
// Step 10.b.i.3.b.
- JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
- JSMSG_TYPED_ARRAY_BAD_INCOMPLETE_CHUNK);
- return false;
+ return Base64Result::Error(Base64Error::IncompleteChunk);
}
}
// Step 10.b.ii.
- *readLength = length;
- return true;
+ return Base64Result::Ok(length, written());
}
// Step 10.e.
MOZ_ASSERT(index < length);
- MOZ_ASSERT(linear->latin1OrTwoByteChar(index) == '=');
+ MOZ_ASSERT(chars[index] == '=');
// Step 10.e.i.
if (chunkLength < 2) {
- JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
- JSMSG_TYPED_ARRAY_BAD_INCOMPLETE_CHUNK);
- return false;
+ return Base64Result::Error(Base64Error::IncompleteChunk);
}
MOZ_ASSERT(chunkLength == 2 || chunkLength == 3);
// Step 10.e.ii. (Inlined SkipAsciiWhitespace)
while (++index < length) {
- char16_t ch = linear->latin1OrTwoByteChar(index);
+ auto ch = chars[index];
if (!mozilla::IsAsciiWhitespace(ch)) {
break;
}
@@ -4866,24 +4858,21 @@ static bool FromBase64(JSContext* cx, Handle<JSString*> string,
if (index == length) {
// Step 10.e.iii.1.a.
if (lastChunkHandling == LastChunkHandling::StopBeforePartial) {
- *readLength = read;
- return true;
+ return Base64Result::Ok(read, written());
}
// Step 10.e.iii.1.b.
- JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
- JSMSG_TYPED_ARRAY_MISSING_BASE64_PADDING);
- return false;
+ return Base64Result::Error(Base64Error::MissingPadding);
}
// Step 10.e.iii.2.
- char16_t ch = linear->latin1OrTwoByteChar(index);
+ auto ch = chars[index];
// Step 10.e.iii.3.
if (ch == '=') {
// Step 10.e.iii.3.a. (Inlined SkipAsciiWhitespace)
while (++index < length) {
- char16_t ch = linear->latin1OrTwoByteChar(index);
+ auto ch = chars[index];
if (!mozilla::IsAsciiWhitespace(ch)) {
break;
}
@@ -4893,26 +4882,79 @@ static bool FromBase64(JSContext* cx, Handle<JSString*> string,
// Step 10.e.iv.
if (index < length) {
- char16_t ch = linear->latin1OrTwoByteChar(index);
- if (auto str = QuoteString(cx, ch)) {
- JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
- JSMSG_TYPED_ARRAY_BAD_BASE64_AFTER_PADDING,
- str.get());
- }
- return false;
+ return Base64Result::ErrorAt(Base64Error::BadCharAfterPadding, index);
}
// Steps 10.e.v-vi.
- bool throwOnExtraBits = lastChunkHandling == LastChunkHandling::Strict;
+ if (lastChunkHandling == LastChunkHandling::Strict) {
+ uint32_t extraBitsMask = chunkLength == 2 ? 0xf : 0x3;
+ if ((chunk & extraBitsMask) != 0) {
+ return Base64Result::Error(Base64Error::ExtraBits);
+ }
+ }
// Step 10.e.vii.
- if (!decodePartialChunk(chunk, chunkLength, throwOnExtraBits)) {
- return false;
- }
+ decodePartialChunk(chunk, chunkLength);
// Step 10.e.viii.
- *readLength = length;
- return true;
+ return Base64Result::Ok(length, written());
+}
+
+/**
+ * FromBase64 ( string, alphabet, lastChunkHandling [ , maxLength ] )
+ *
+ * https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64
+ */
+template <class Ops>
+static auto FromBase64(JSLinearString* string, Alphabet alphabet,
+ LastChunkHandling lastChunkHandling,
+ SharedMem<uint8_t*> data, size_t maxLength) {
+ JS::AutoCheckCannotGC nogc;
+ if (string->hasLatin1Chars()) {
+ return FromBase64<Ops>(string->latin1Chars(nogc), string->length(),
+ alphabet, lastChunkHandling, data, maxLength);
+ }
+ return FromBase64<Ops>(string->twoByteChars(nogc), string->length(), alphabet,
+ lastChunkHandling, data, maxLength);
+}
+
+/**
+ * FromBase64 ( string, alphabet, lastChunkHandling [ , maxLength ] )
+ *
+ * https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64
+ */
+static auto FromBase64(JSLinearString* string, Alphabet alphabet,
+ LastChunkHandling lastChunkHandling,
+ TypedArrayObject* tarray, size_t maxLength) {
+ MOZ_ASSERT(tarray->type() == Scalar::Uint8);
+
+ // The underlying buffer must neither be detached nor shrunk. (It may have
+ // been grown when it's a growable shared buffer and a concurrent thread
+ // resized the buffer.)
+ MOZ_ASSERT(!tarray->hasDetachedBuffer());
+ MOZ_ASSERT(tarray->length().valueOr(0) >= maxLength);
+
+ auto data = tarray->dataPointerEither().cast<uint8_t*>();
+
+ if (tarray->isSharedMemory()) {
+ return FromBase64<SharedOps>(string, alphabet, lastChunkHandling, data,
+ maxLength);
+ }
+ return FromBase64<UnsharedOps>(string, alphabet, lastChunkHandling, data,
+ maxLength);
+}
+
+/**
+ * FromBase64 ( string, alphabet, lastChunkHandling [ , maxLength ] )
+ *
+ * https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64
+ */
+static auto FromBase64(JSLinearString* string, Alphabet alphabet,
+ LastChunkHandling lastChunkHandling, ByteVector& bytes) {
+ auto data = SharedMem<uint8_t*>::unshared(bytes.begin());
+ size_t maxLength = bytes.length();
+ return FromBase64<UnsharedOps>(string, alphabet, lastChunkHandling, data,
+ maxLength);
}
/**
@@ -5063,17 +5105,38 @@ static bool uint8array_fromBase64(JSContext* cx, unsigned argc, Value* vp) {
}
}
+ // Compute the output byte length. Four input characters are decoded into
+ // three bytes, so the output length can't be larger than ⌈length × 3/4⌉.
+ auto outLength = mozilla::CheckedInt<size_t>{string->length()};
+ outLength += 3;
+ outLength /= 4;
+ outLength *= 3;
+ MOZ_ASSERT(outLength.isValid(), "can't overflow");
+
+ static_assert(JSString::MAX_LENGTH <= TypedArrayObject::ByteLengthLimit,
+ "string length doesn't exceed maximum typed array length");
+
// Step 10.
ByteVector bytes(cx);
- ByteSink sink{bytes};
- size_t unusedReadLength;
- if (!FromBase64(cx, string, alphabet, lastChunkHandling, sink,
- &unusedReadLength)) {
+ if (!bytes.resizeUninitialized(outLength.value())) {
+ return false;
+ }
+
+ JSLinearString* linear = string->ensureLinear(cx);
+ if (!linear) {
+ return false;
+ }
+
+ auto result = FromBase64(linear, alphabet, lastChunkHandling, bytes);
+ if (MOZ_UNLIKELY(result.isError())) {
+ ReportBase64Error(cx, result, linear);
return false;
}
+ MOZ_ASSERT(result.index <= linear->length());
+ MOZ_ASSERT(result.written <= bytes.length());
// Step 11.
- size_t resultLength = bytes.length();
+ size_t resultLength = result.written;
// Step 12.
auto* tarray =
@@ -5189,16 +5252,27 @@ static bool uint8array_setFromBase64(JSContext* cx, const CallArgs& args) {
return false;
}
- // Steps 15-17.
- ByteVector bytes(cx);
- TypedArraySink sink{tarray, *length};
- size_t readLength;
- if (!FromBase64(cx, string, alphabet, lastChunkHandling, sink, &readLength)) {
- return false;
- }
+ // Steps 15-18.
+ size_t readLength = 0;
+ size_t written = 0;
+ if (*length > 0) {
+ JSLinearString* linear = string->ensureLinear(cx);
+ if (!linear) {
+ return false;
+ }
- // Step 18.
- size_t written = sink.written();
+ auto result =
+ FromBase64(linear, alphabet, lastChunkHandling, tarray, *length);
+ if (MOZ_UNLIKELY(result.isError())) {
+ ReportBase64Error(cx, result, linear);
+ return false;
+ }
+ MOZ_ASSERT(result.index <= linear->length());
+ MOZ_ASSERT(result.written <= *length);
+
+ readLength = result.index;
+ written = result.written;
+ }
// Steps 19-21. (Not applicable in our implementation.)