[ tor-browser ].git.dasho

commit 0eb57901407a79db6d37a2624afe1d7f49bbf571
parent 37920fe3b2c1d4db100b775844b47af9d5112fa2
Author: André Bargull <andre.bargull@gmail.com>
Date:   Fri, 24 Oct 2025 07:59:15 +0000

Bug 1995626 - Part 2: Add separate loop to decode full chunks in FromBase64. r=iain

Two changes:
1. Extend the decode table to 256 elements and change the element type to
   `int8_t`. This matches the changes from part 1.
2. Add a separate loop to process full chunks. This saves additional branches,
   because we no longer have to if the output is full for each character read.
   Also try to read four consecutive characters if possible and treat whitespace
   characters in a slow path.

Differential Revision: https://phabricator.services.mozilla.com/D269620

Diffstat:
M js/src/vm/TypedArrayObject.cpp  | 162 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------

1 file changed, 119 insertions(+), 43 deletions(-)
diff --git a/js/src/vm/TypedArrayObject.cpp b/js/src/vm/TypedArrayObject.cpp
@@ -4523,10 +4523,10 @@ static bool FromHex(JSContext* cx, JSString* string, size_t maxLength,
 }
 
 namespace Base64 {
-static constexpr uint8_t InvalidChar = UINT8_MAX;
+static constexpr int8_t InvalidChar = -1;
 
 static constexpr auto DecodeTable(const char (&alphabet)[65]) {
-  std::array<uint8_t, 128> result = {};
+  std::array<int8_t, 256> result = {};
 
   // Initialize all elements to InvalidChar.
   for (auto& e : result) {
@@ -4554,12 +4554,12 @@ static_assert(std::char_traits<char>::length(Base64Url) == 64);
 
 namespace Base64::Decode {
 static constexpr auto Base64 = DecodeTable(Base64::Encode::Base64);
-static_assert(Base64.size() == 128,
-              "128 elements to allow access through ASCII characters");
+static_assert(Base64.size() == 256,
+              "256 elements to allow access through Latin-1 characters");
 
 static constexpr auto Base64Url = DecodeTable(Base64::Encode::Base64Url);
-static_assert(Base64Url.size() == 128,
-              "128 elements to allow access through ASCII characters");
+static_assert(Base64Url.size() == 256,
+              "256 elements to allow access through Latin-1 characters");
 }  // namespace Base64::Decode
 
 enum class Alphabet {
@@ -4719,11 +4719,118 @@ static auto FromBase64(const CharT* chars, size_t length, Alphabet alphabet,
   if (maxLength == 0) {
     return Base64Result::Ok(0, 0);
   }
+  MOZ_ASSERT(canAppend(1), "can append at least one byte if maxLength > 0");
+
+  // Step 8.
+  //
+  // Current string index.
+  size_t index = 0;
+
+  // Step 9. (Passed as parameter)
+
+  static_assert(std::size(Base64::Decode::Base64) == 256 &&
+                    std::size(Base64::Decode::Base64Url) == 256,
+                "can access decode tables using Latin-1 character");
+
+  const auto& decode = alphabet == Alphabet::Base64 ? Base64::Decode::Base64
+                                                    : Base64::Decode::Base64Url;
+
+  auto decodeChar = [&](CharT ch) -> int32_t {
+    if constexpr (sizeof(CharT) == 1) {
+      return decode[ch];
+    } else {
+      return ch <= 255 ? decode[ch] : Base64::InvalidChar;
+    }
+  };
+
+  auto decode4Chars = [&](const CharT* chars) {
+    return (decodeChar(chars[0]) << 18) | (decodeChar(chars[1]) << 12) |
+           (decodeChar(chars[2]) << 6) | (decodeChar(chars[3]));
+  };
+
+  // Initial loop to process only full chunks. Doesn't perform any error
+  // reporting and expects that at least four characters can be read per loop
+  // iteration and that the output has enough space for a decoded chunk.
+
+  size_t alignedLength = length & ~0x3;
+  while (canAppend(3) && index < alignedLength) {
+    // Fast path: Read four consecutive characters.
+
+    // Step 10.a. (Performed in slow path.)
+
+    // Step 10.b. (Moved out of loop.)
+
+    // Steps 10.c and 10.e-g.
+    uint32_t chunk = decode4Chars(chars + index);
+
+    // Steps 10.h-i. (Not applicable in this loop.)
+
+    // Steps 10.d and 10.j-l.
+    if (MOZ_LIKELY(int32_t(chunk) >= 0)) {
+      // Step 10.j-l.
+      decodeChunk(chunk);
+
+      // Step 10.d.
+      index += 4;
+      continue;
+    }
+
+    // Slow path: Read four characters, ignoring whitespace.
+
+    // Steps 10.a and 10.b.
+    CharT part[4];
+    size_t i = index;
+    size_t j = 0;
+    while (i < length && j < 4) {
+      auto ch = chars[i++];
+
+      // Step 10.a.
+      if (mozilla::IsAsciiWhitespace(ch)) {
+        continue;
+      }
+
+      // Step 10.c.
+      part[j++] = ch;
+    }
+
+    // Steps 10.d-l.
+    if (MOZ_LIKELY(j == 4)) {
+      // Steps 10.e-g.
+      uint32_t chunk = decode4Chars(part);
+
+      // Steps 10.h-i. (Not applicable in this loop.)
+
+      // Steps 10.d and 10.j-l.
+      if (MOZ_LIKELY(int32_t(chunk) >= 0)) {
+        // Step 10.j-l.
+        decodeChunk(chunk);
+
+        // Step 10.d.
+        index = i;
+        continue;
+      }
+    }
+
+    // Padding or invalid characters, or end of input. The next loop will
+    // process any characters left in the input.
+    break;
+  }
+
+  // Step 10.b.ii.
+  if (index == length) {
+    return Base64Result::Ok(length, written());
+  }
 
   // Step 4.
   //
   // String index after the last fully read base64 chunk.
-  size_t read = 0;
+  size_t read = index;
+
+  // Step 10.l.v. (Reordered)
+  if (!canAppend(1)) {
+    MOZ_ASSERT(written() > 0);
+    return Base64Result::Ok(read, written());
+  }
 
   // Step 5. (Not applicable in our implementation.)
 
@@ -4737,16 +4844,6 @@ static auto FromBase64(const CharT* chars, size_t length, Alphabet alphabet,
   // Current base64 chunk length, in the range [0..4].
   size_t chunkLength = 0;
 
-  // Step 8.
-  //
-  // Current string index.
-  size_t index = 0;
-
-  // Step 9. (Passed as parameter)
-
-  const auto& decode = alphabet == Alphabet::Base64 ? Base64::Decode::Base64
-                                                    : Base64::Decode::Base64Url;
-
   // Step 10.
   for (; index < length; index++) {
     // Step 10.c. (Reordered)
@@ -4767,13 +4864,11 @@ static auto FromBase64(const CharT* chars, size_t length, Alphabet alphabet,
     }
 
     // Steps 10.f-g.
-    uint8_t value = Base64::InvalidChar;
-    if (mozilla::IsAscii(ch)) {
-      value = decode[ch];
-    }
-    if (MOZ_UNLIKELY(value == Base64::InvalidChar)) {
+    uint32_t value = decodeChar(ch);
+    if (MOZ_UNLIKELY(int32_t(value) < 0)) {
       return Base64Result::ErrorAt(Base64Error::BadChar, index);
     }
+    MOZ_ASSERT(0 <= value && value <= 0x7f);
 
     // Step 10.h. (Not applicable in our implementation.)
 
@@ -4788,27 +4883,8 @@ static auto FromBase64(const CharT* chars, size_t length, Alphabet alphabet,
     // Step 10.k.
     chunkLength += 1;
 
-    // Step 10.l.
-    if (chunkLength == 4) {
-      // Step 10.l.i.
-      decodeChunk(chunk);
-
-      // Step 10.l.ii.
-      chunk = 0;
-
-      // Step 10.l.iii.
-      chunkLength = 0;
-
-      // Step 10.l.iv.
-      //
-      // NB: Add +1 to include the |index| update from step 10.d.
-      read = index + 1;
-
-      // Step 10.l.v.
-      if (!canAppend(1)) {
-        return Base64Result::Ok(read, written());
-      }
-    }
+    // Step 10.l. (Full chunks are processed in the initial loop.)
+    MOZ_ASSERT(chunkLength < 4);
   }
 
   // Step 10.b.

	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE