[ tor-browser ].git.dasho

commit 87c044b95f419b4d461eac12af9372e0df389307
parent 612f84975ef380bbaaf5939051f0c33d627c3318
Author: André Bargull <andre.bargull@gmail.com>
Date:   Mon, 20 Oct 2025 12:03:54 +0000

Bug 1994067 - Part 4: Improve performance of Uint8Array.fromBase64. r=spidermonkey-reviewers,iain

Changes:
- Add templates for shared and unshared memory and Latin-1 and Two-Byte strings
  like in part 3.
- Move the error reporting to the caller. This avoids hazard errors and should
  make it easier to integrate simdutf.
- Directly allocate a large enough byte vector in `fromBase64` to avoid fallible
  `append` operations.

For a 5KB Uint8Array, about 80-85% of the time is now spend in `FromBase64`. The
rest is call and allocation overhead.

Differential Revision: https://phabricator.services.mozilla.com/D269018

Diffstat:
M js/src/vm/TypedArrayObject.cpp  | 382 +++++++++++++++++++++++++++++++++++++++++++++++--------------------------------

1 file changed, 228 insertions(+), 154 deletions(-)
diff --git a/js/src/vm/TypedArrayObject.cpp b/js/src/vm/TypedArrayObject.cpp
@@ -4320,56 +4320,6 @@ TypedArrayObject* js::TypedArraySubarrayRecover(JSContext* cx,
 using ByteVector =
     js::Vector<uint8_t, FixedLengthTypedArrayObject::INLINE_BUFFER_LIMIT>;
 
-class ByteSink final {
-  ByteVector& bytes_;
-
- public:
-  explicit ByteSink(ByteVector& bytes) : bytes_(bytes) {
-    MOZ_ASSERT(bytes.empty());
-  }
-
-  constexpr bool canAppend(size_t n = 1) const { return true; }
-
-  template <typename... Args>
-  bool append(Args... args) {
-    if (!bytes_.reserve(bytes_.length() + sizeof...(args))) {
-      return false;
-    }
-    (bytes_.infallibleAppend(args), ...);
-    return true;
-  }
-};
-
-class TypedArraySink final {
-  Handle<TypedArrayObject*> typedArray_;
-  size_t maxLength_;
-  size_t index_ = 0;
-
- public:
-  TypedArraySink(Handle<TypedArrayObject*> typedArray, size_t maxLength)
-      : typedArray_(typedArray), maxLength_(maxLength) {
-    MOZ_ASSERT(typedArray->type() == Scalar::Uint8);
-
-    // The underlying buffer must neither be detached nor shrunk. (It may have
-    // been grown when it's a growable shared buffer and a concurrent thread
-    // resized the buffer.)
-    MOZ_ASSERT(!typedArray->hasDetachedBuffer());
-    MOZ_ASSERT(typedArray->length().valueOr(0) >= maxLength);
-  }
-
-  size_t written() const { return index_; }
-
-  bool canAppend(size_t n = 1) const { return maxLength_ - index_ >= n; }
-
-  template <typename... Args>
-  bool append(Args... args) {
-    MOZ_ASSERT(canAppend(sizeof...(args)));
-    (TypedArrayObjectTemplate<uint8_t>::setIndex(*typedArray_, index_++, args),
-     ...);
-    return true;
-  }
-};
-
 static UniqueChars QuoteString(JSContext* cx, char16_t ch) {
   Sprinter sprinter(cx);
   if (!sprinter.init()) {
@@ -4632,74 +4582,135 @@ enum class LastChunkHandling {
   StopBeforePartial,
 };
 
+enum class Base64Error {
+  None,
+  BadChar,
+  BadCharAfterPadding,
+  IncompleteChunk,
+  MissingPadding,
+  ExtraBits,
+};
+
+struct Base64Result {
+  Base64Error error;
+  size_t index;
+  size_t written;
+
+  bool isError() const { return error != Base64Error::None; }
+
+  static auto Ok(size_t index, size_t written) {
+    return Base64Result{Base64Error::None, index, written};
+  }
+
+  static auto Error(Base64Error error) {
+    MOZ_ASSERT(error != Base64Error::None);
+    return Base64Result{error, 0, 0};
+  }
+
+  static auto ErrorAt(Base64Error error, size_t index) {
+    MOZ_ASSERT(error != Base64Error::None);
+    return Base64Result{error, index, 0};
+  }
+};
+
+static void ReportBase64Error(JSContext* cx, Base64Result result,
+                              JSLinearString* string) {
+  MOZ_ASSERT(result.isError());
+  switch (result.error) {
+    case Base64Error::None:
+      break;
+    case Base64Error::BadChar:
+      if (auto str =
+              QuoteString(cx, string->latin1OrTwoByteChar(result.index))) {
+        JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
+                                  JSMSG_TYPED_ARRAY_BAD_BASE64_CHAR, str.get());
+      }
+      return;
+    case Base64Error::BadCharAfterPadding:
+      if (auto str =
+              QuoteString(cx, string->latin1OrTwoByteChar(result.index))) {
+        JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
+                                  JSMSG_TYPED_ARRAY_BAD_BASE64_AFTER_PADDING,
+                                  str.get());
+      }
+      return;
+    case Base64Error::IncompleteChunk:
+      JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
+                                JSMSG_TYPED_ARRAY_BAD_INCOMPLETE_CHUNK);
+      return;
+    case Base64Error::MissingPadding:
+      JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
+                                JSMSG_TYPED_ARRAY_MISSING_BASE64_PADDING);
+      return;
+    case Base64Error::ExtraBits:
+      JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
+                                JSMSG_TYPED_ARRAY_EXTRA_BASE64_BITS);
+      return;
+  }
+  MOZ_CRASH("unexpected base64 error");
+}
+
 /**
  * FromBase64 ( string, alphabet, lastChunkHandling [ , maxLength ] )
  *
  * https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64
  */
-template <class Sink>
-static bool FromBase64(JSContext* cx, Handle<JSString*> string,
-                       Alphabet alphabet, LastChunkHandling lastChunkHandling,
-                       Sink& sink, size_t* readLength) {
-  // Steps 1-2. (Not applicable in our implementation.)
+template <class Ops, typename CharT>
+static auto FromBase64(const CharT* chars, size_t length, Alphabet alphabet,
+                       LastChunkHandling lastChunkHandling,
+                       SharedMem<uint8_t*> data, size_t maxLength) {
+  const SharedMem<uint8_t*> dataBegin = data;
+  const SharedMem<uint8_t*> dataEnd = data + maxLength;
 
-  // Step 3.
-  if (!sink.canAppend()) {
-    *readLength = 0;
-    return true;
-  }
+  auto canAppend = [&](size_t n) { return data + n <= dataEnd; };
 
-  JSLinearString* linear = string->ensureLinear(cx);
-  if (!linear) {
-    return false;
-  }
+  auto written = [&]() { return data.unwrap() - dataBegin.unwrap(); };
 
   // DecodeBase64Chunk ( chunk [ , throwOnExtraBits ] )
   //
   // Encode a complete base64 chunk.
   auto decodeChunk = [&](uint32_t chunk) {
     MOZ_ASSERT(chunk <= 0xffffff);
-    return sink.append(chunk >> 16, chunk >> 8, chunk);
+    MOZ_ASSERT(canAppend(3));
+    Ops::store(data++, uint8_t(chunk >> 16));
+    Ops::store(data++, uint8_t(chunk >> 8));
+    Ops::store(data++, uint8_t(chunk));
   };
 
   // DecodeBase64Chunk ( chunk [ , throwOnExtraBits ] )
   //
   // Encode a three element partial base64 chunk.
-  auto decodeChunk3 = [&](uint32_t chunk, bool throwOnExtraBits) {
+  auto decodeChunk3 = [&](uint32_t chunk) {
     MOZ_ASSERT(chunk <= 0x3ffff);
-
-    if (throwOnExtraBits && (chunk & 0x3) != 0) {
-      JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
-                                JSMSG_TYPED_ARRAY_EXTRA_BASE64_BITS);
-      return false;
-    }
-    return sink.append(chunk >> 10, chunk >> 2);
+    MOZ_ASSERT(canAppend(2));
+    Ops::store(data++, uint8_t(chunk >> 10));
+    Ops::store(data++, uint8_t(chunk >> 2));
   };
 
   // DecodeBase64Chunk ( chunk [ , throwOnExtraBits ] )
   //
   // Encode a two element partial base64 chunk.
-  auto decodeChunk2 = [&](uint32_t chunk, bool throwOnExtraBits) {
+  auto decodeChunk2 = [&](uint32_t chunk) {
     MOZ_ASSERT(chunk <= 0xfff);
-
-    if (throwOnExtraBits && (chunk & 0xf) != 0) {
-      JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
-                                JSMSG_TYPED_ARRAY_EXTRA_BASE64_BITS);
-      return false;
-    }
-    return sink.append(chunk >> 4);
+    MOZ_ASSERT(canAppend(1));
+    Ops::store(data++, uint8_t(chunk >> 4));
   };
 
   // DecodeBase64Chunk ( chunk [ , throwOnExtraBits ] )
   //
   // Encode a partial base64 chunk.
-  auto decodePartialChunk = [&](uint32_t chunk, uint32_t chunkLength,
-                                bool throwOnExtraBits = false) {
+  auto decodePartialChunk = [&](uint32_t chunk, uint32_t chunkLength) {
     MOZ_ASSERT(chunkLength == 2 || chunkLength == 3);
-    return chunkLength == 2 ? decodeChunk2(chunk, throwOnExtraBits)
-                            : decodeChunk3(chunk, throwOnExtraBits);
+    chunkLength == 2 ? decodeChunk2(chunk) : decodeChunk3(chunk);
   };
 
+  // Steps 1-2. (Not applicable in our implementation.)
+
+  // Step 3.
+  if (maxLength == 0) {
+    return Base64Result::Ok(0, 0);
+  }
+
   // Step 4.
   //
   // String index after the last fully read base64 chunk.
@@ -4722,8 +4733,7 @@ static bool FromBase64(JSContext* cx, Handle<JSString*> string,
   // Current string index.
   size_t index = 0;
 
-  // Step 9.
-  size_t length = linear->length();
+  // Step 9. (Passed as parameter)
 
   const auto& decode = alphabet == Alphabet::Base64 ? Base64::Decode::Base64
                                                     : Base64::Decode::Base64Url;
@@ -4731,7 +4741,7 @@ static bool FromBase64(JSContext* cx, Handle<JSString*> string,
   // Step 10.
   for (; index < length; index++) {
     // Step 10.c. (Reordered)
-    char16_t ch = linear->latin1OrTwoByteChar(index);
+    auto ch = chars[index];
 
     // Step 10.a.
     if (mozilla::IsAsciiWhitespace(ch)) {
@@ -4753,19 +4763,14 @@ static bool FromBase64(JSContext* cx, Handle<JSString*> string,
       value = decode[ch];
     }
     if (MOZ_UNLIKELY(value == Base64::InvalidChar)) {
-      if (auto str = QuoteString(cx, ch)) {
-        JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
-                                  JSMSG_TYPED_ARRAY_BAD_BASE64_CHAR, str.get());
-      }
-      return false;
+      return Base64Result::ErrorAt(Base64Error::BadChar, index);
     }
 
     // Step 10.h. (Not applicable in our implementation.)
 
     // Step 10.i.
-    if (chunkLength > 1 && !sink.canAppend(chunkLength)) {
-      *readLength = read;
-      return true;
+    if (chunkLength > 1 && !canAppend(chunkLength)) {
+      return Base64Result::Ok(read, written());
     }
 
     // Step 10.j.
@@ -4777,9 +4782,7 @@ static bool FromBase64(JSContext* cx, Handle<JSString*> string,
     // Step 10.l.
     if (chunkLength == 4) {
       // Step 10.l.i.
-      if (!decodeChunk(chunk)) {
-        return false;
-      }
+      decodeChunk(chunk);
 
       // Step 10.l.ii.
       chunk = 0;
@@ -4793,9 +4796,8 @@ static bool FromBase64(JSContext* cx, Handle<JSString*> string,
       read = index + 1;
 
       // Step 10.l.v.
-      if (!sink.canAppend()) {
-        *readLength = read;
-        return true;
+      if (!canAppend(1)) {
+        return Base64Result::Ok(read, written());
       }
     }
   }
@@ -4806,55 +4808,45 @@ static bool FromBase64(JSContext* cx, Handle<JSString*> string,
     if (chunkLength > 0) {
       // Step 10.b.i.1.
       if (lastChunkHandling == LastChunkHandling::StopBeforePartial) {
-        *readLength = read;
-        return true;
+        return Base64Result::Ok(read, written());
       }
 
       // Steps 10.b.i.2-3.
       if (lastChunkHandling == LastChunkHandling::Loose) {
         // Step 10.b.i.2.a.
         if (chunkLength == 1) {
-          JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
-                                    JSMSG_TYPED_ARRAY_BAD_INCOMPLETE_CHUNK);
-          return false;
+          return Base64Result::Error(Base64Error::IncompleteChunk);
         }
         MOZ_ASSERT(chunkLength == 2 || chunkLength == 3);
 
         // Step 10.b.i.2.b.
-        if (!decodePartialChunk(chunk, chunkLength)) {
-          return false;
-        }
+        decodePartialChunk(chunk, chunkLength);
       } else {
         // Step 10.b.i.3.a.
         MOZ_ASSERT(lastChunkHandling == LastChunkHandling::Strict);
 
         // Step 10.b.i.3.b.
-        JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
-                                  JSMSG_TYPED_ARRAY_BAD_INCOMPLETE_CHUNK);
-        return false;
+        return Base64Result::Error(Base64Error::IncompleteChunk);
       }
     }
 
     // Step 10.b.ii.
-    *readLength = length;
-    return true;
+    return Base64Result::Ok(length, written());
   }
 
   // Step 10.e.
   MOZ_ASSERT(index < length);
-  MOZ_ASSERT(linear->latin1OrTwoByteChar(index) == '=');
+  MOZ_ASSERT(chars[index] == '=');
 
   // Step 10.e.i.
   if (chunkLength < 2) {
-    JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
-                              JSMSG_TYPED_ARRAY_BAD_INCOMPLETE_CHUNK);
-    return false;
+    return Base64Result::Error(Base64Error::IncompleteChunk);
   }
   MOZ_ASSERT(chunkLength == 2 || chunkLength == 3);
 
   // Step 10.e.ii. (Inlined SkipAsciiWhitespace)
   while (++index < length) {
-    char16_t ch = linear->latin1OrTwoByteChar(index);
+    auto ch = chars[index];
     if (!mozilla::IsAsciiWhitespace(ch)) {
       break;
     }
@@ -4866,24 +4858,21 @@ static bool FromBase64(JSContext* cx, Handle<JSString*> string,
     if (index == length) {
       // Step 10.e.iii.1.a.
       if (lastChunkHandling == LastChunkHandling::StopBeforePartial) {
-        *readLength = read;
-        return true;
+        return Base64Result::Ok(read, written());
       }
 
       // Step 10.e.iii.1.b.
-      JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
-                                JSMSG_TYPED_ARRAY_MISSING_BASE64_PADDING);
-      return false;
+      return Base64Result::Error(Base64Error::MissingPadding);
     }
 
     // Step 10.e.iii.2.
-    char16_t ch = linear->latin1OrTwoByteChar(index);
+    auto ch = chars[index];
 
     // Step 10.e.iii.3.
     if (ch == '=') {
       // Step 10.e.iii.3.a. (Inlined SkipAsciiWhitespace)
       while (++index < length) {
-        char16_t ch = linear->latin1OrTwoByteChar(index);
+        auto ch = chars[index];
         if (!mozilla::IsAsciiWhitespace(ch)) {
           break;
         }
@@ -4893,26 +4882,79 @@ static bool FromBase64(JSContext* cx, Handle<JSString*> string,
 
   // Step 10.e.iv.
   if (index < length) {
-    char16_t ch = linear->latin1OrTwoByteChar(index);
-    if (auto str = QuoteString(cx, ch)) {
-      JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
-                                JSMSG_TYPED_ARRAY_BAD_BASE64_AFTER_PADDING,
-                                str.get());
-    }
-    return false;
+    return Base64Result::ErrorAt(Base64Error::BadCharAfterPadding, index);
   }
 
   // Steps 10.e.v-vi.
-  bool throwOnExtraBits = lastChunkHandling == LastChunkHandling::Strict;
+  if (lastChunkHandling == LastChunkHandling::Strict) {
+    uint32_t extraBitsMask = chunkLength == 2 ? 0xf : 0x3;
+    if ((chunk & extraBitsMask) != 0) {
+      return Base64Result::Error(Base64Error::ExtraBits);
+    }
+  }
 
   // Step 10.e.vii.
-  if (!decodePartialChunk(chunk, chunkLength, throwOnExtraBits)) {
-    return false;
-  }
+  decodePartialChunk(chunk, chunkLength);
 
   // Step 10.e.viii.
-  *readLength = length;
-  return true;
+  return Base64Result::Ok(length, written());
+}
+
+/**
+ * FromBase64 ( string, alphabet, lastChunkHandling [ , maxLength ] )
+ *
+ * https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64
+ */
+template <class Ops>
+static auto FromBase64(JSLinearString* string, Alphabet alphabet,
+                       LastChunkHandling lastChunkHandling,
+                       SharedMem<uint8_t*> data, size_t maxLength) {
+  JS::AutoCheckCannotGC nogc;
+  if (string->hasLatin1Chars()) {
+    return FromBase64<Ops>(string->latin1Chars(nogc), string->length(),
+                           alphabet, lastChunkHandling, data, maxLength);
+  }
+  return FromBase64<Ops>(string->twoByteChars(nogc), string->length(), alphabet,
+                         lastChunkHandling, data, maxLength);
+}
+
+/**
+ * FromBase64 ( string, alphabet, lastChunkHandling [ , maxLength ] )
+ *
+ * https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64
+ */
+static auto FromBase64(JSLinearString* string, Alphabet alphabet,
+                       LastChunkHandling lastChunkHandling,
+                       TypedArrayObject* tarray, size_t maxLength) {
+  MOZ_ASSERT(tarray->type() == Scalar::Uint8);
+
+  // The underlying buffer must neither be detached nor shrunk. (It may have
+  // been grown when it's a growable shared buffer and a concurrent thread
+  // resized the buffer.)
+  MOZ_ASSERT(!tarray->hasDetachedBuffer());
+  MOZ_ASSERT(tarray->length().valueOr(0) >= maxLength);
+
+  auto data = tarray->dataPointerEither().cast<uint8_t*>();
+
+  if (tarray->isSharedMemory()) {
+    return FromBase64<SharedOps>(string, alphabet, lastChunkHandling, data,
+                                 maxLength);
+  }
+  return FromBase64<UnsharedOps>(string, alphabet, lastChunkHandling, data,
+                                 maxLength);
+}
+
+/**
+ * FromBase64 ( string, alphabet, lastChunkHandling [ , maxLength ] )
+ *
+ * https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64
+ */
+static auto FromBase64(JSLinearString* string, Alphabet alphabet,
+                       LastChunkHandling lastChunkHandling, ByteVector& bytes) {
+  auto data = SharedMem<uint8_t*>::unshared(bytes.begin());
+  size_t maxLength = bytes.length();
+  return FromBase64<UnsharedOps>(string, alphabet, lastChunkHandling, data,
+                                 maxLength);
 }
 
 /**
@@ -5063,17 +5105,38 @@ static bool uint8array_fromBase64(JSContext* cx, unsigned argc, Value* vp) {
     }
   }
 
+  // Compute the output byte length. Four input characters are decoded into
+  // three bytes, so the output length can't be larger than ⌈length × 3/4⌉.
+  auto outLength = mozilla::CheckedInt<size_t>{string->length()};
+  outLength += 3;
+  outLength /= 4;
+  outLength *= 3;
+  MOZ_ASSERT(outLength.isValid(), "can't overflow");
+
+  static_assert(JSString::MAX_LENGTH <= TypedArrayObject::ByteLengthLimit,
+                "string length doesn't exceed maximum typed array length");
+
   // Step 10.
   ByteVector bytes(cx);
-  ByteSink sink{bytes};
-  size_t unusedReadLength;
-  if (!FromBase64(cx, string, alphabet, lastChunkHandling, sink,
-                  &unusedReadLength)) {
+  if (!bytes.resizeUninitialized(outLength.value())) {
+    return false;
+  }
+
+  JSLinearString* linear = string->ensureLinear(cx);
+  if (!linear) {
+    return false;
+  }
+
+  auto result = FromBase64(linear, alphabet, lastChunkHandling, bytes);
+  if (MOZ_UNLIKELY(result.isError())) {
+    ReportBase64Error(cx, result, linear);
     return false;
   }
+  MOZ_ASSERT(result.index <= linear->length());
+  MOZ_ASSERT(result.written <= bytes.length());
 
   // Step 11.
-  size_t resultLength = bytes.length();
+  size_t resultLength = result.written;
 
   // Step 12.
   auto* tarray =
@@ -5189,16 +5252,27 @@ static bool uint8array_setFromBase64(JSContext* cx, const CallArgs& args) {
     return false;
   }
 
-  // Steps 15-17.
-  ByteVector bytes(cx);
-  TypedArraySink sink{tarray, *length};
-  size_t readLength;
-  if (!FromBase64(cx, string, alphabet, lastChunkHandling, sink, &readLength)) {
-    return false;
-  }
+  // Steps 15-18.
+  size_t readLength = 0;
+  size_t written = 0;
+  if (*length > 0) {
+    JSLinearString* linear = string->ensureLinear(cx);
+    if (!linear) {
+      return false;
+    }
 
-  // Step 18.
-  size_t written = sink.written();
+    auto result =
+        FromBase64(linear, alphabet, lastChunkHandling, tarray, *length);
+    if (MOZ_UNLIKELY(result.isError())) {
+      ReportBase64Error(cx, result, linear);
+      return false;
+    }
+    MOZ_ASSERT(result.index <= linear->length());
+    MOZ_ASSERT(result.written <= *length);
+
+    readLength = result.index;
+    written = result.written;
+  }
 
   // Steps 19-21. (Not applicable in our implementation.)

	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE