commit 13d388fa5201d0b1b92b7ee2acd75243f7ad7924
parent 6a7261ea6224640f30a71ba61c3eed32d285c7c4
Author: André Bargull <andre.bargull@gmail.com>
Date: Mon, 20 Oct 2025 12:03:53 +0000
Bug 1994067 - Part 2: Improve performance of Uint8Array.fromHex. r=spidermonkey-reviewers,iain
Changes:
- Add templates for shared and unshared memory and for Latin-1 and Two-Byte
characters. This avoids repeatedly checking the kind of the source resp.
output.
- Change `FromHex` to directly compute the last valid string index when called
from `setFromHex`. That avoids the `sink.canAppend()` call from the old
implementation.
- Replace `mozilla::IsAsciiHexDigit` and `mozilla::AsciiAlphanumericToNumber` with
`HexDigitToNibbleOrInvalid` to perform validation and conversion in a single method.
- And add a loop to process eight characters per loop iteration to minimise the
store instructions into the output.
For a 5KB Uint8Array, 55-60% of the time is now spend in `FromHex`, the rest is
call and allocation overhead.
Differential Revision: https://phabricator.services.mozilla.com/D269016
Diffstat:
1 file changed, 195 insertions(+), 63 deletions(-)
diff --git a/js/src/vm/TypedArrayObject.cpp b/js/src/vm/TypedArrayObject.cpp
@@ -9,6 +9,7 @@
#include "mozilla/Casting.h"
#include "mozilla/CheckedInt.h"
+#include "mozilla/EndianUtils.h"
#include "mozilla/FloatingPoint.h"
#include "mozilla/IntegerTypeTraits.h"
#include "mozilla/Likely.h"
@@ -4382,68 +4383,183 @@ static UniqueChars QuoteString(JSContext* cx, char16_t ch) {
return sprinter.release();
}
+// Constant for an invalid nibble. Choosen so that validation can be performed
+// with just 1-2 instructions on all supported architectures.
+static constexpr uint32_t InvalidNibble = -1;
+
+template <typename Char>
+static inline uint32_t HexDigitToNibbleOrInvalid(Char ch) {
+ if ('0' <= ch && ch <= '9') {
+ return ch - '0';
+ }
+ if ('A' <= ch && ch <= 'F') {
+ return ch - 'A' + 10;
+ }
+ if ('a' <= ch && ch <= 'f') {
+ return ch - 'a' + 10;
+ }
+ return InvalidNibble;
+}
+
/**
* FromHex ( string [ , maxLength ] )
*
* https://tc39.es/proposal-arraybuffer-base64/spec/#sec-fromhex
*/
-template <class Sink>
-static bool FromHex(JSContext* cx, Handle<JSString*> string, Sink& sink,
- size_t* readLength) {
- // Step 1. (Not applicable in our implementation.)
+template <typename Ops, typename CharT>
+static size_t FromHex(const CharT* chars, size_t length,
+ TypedArrayObject* tarray) {
+ auto data = Ops::extract(tarray).template cast<uint8_t*>();
- // Step 2.
- size_t length = string->length();
+ // Step 4.
+ size_t index = 0;
- // Step 3.
- if (length % 2 != 0) {
- JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
- JSMSG_TYPED_ARRAY_BAD_HEX_STRING_LENGTH);
- return false;
- }
+ // Step 5. (Checked in caller.)
+ MOZ_ASSERT(length % 2 == 0);
- JSLinearString* linear = string->ensureLinear(cx);
- if (!linear) {
- return false;
- }
+ // Process eight characters per loop iteration.
+ size_t alignedLength = length & ~7;
+ if (index < alignedLength) {
+ auto data32 = data.template cast<uint32_t*>();
+
+ // Step 6.
+ while (index < alignedLength) {
+ // Step 6.a.
+ auto c0 = chars[index + 0];
+ auto c1 = chars[index + 1];
+ auto c2 = chars[index + 2];
+ auto c3 = chars[index + 3];
+
+ // Step 6.d.
+ uint32_t word1 = (HexDigitToNibbleOrInvalid(c2) << 12) |
+ (HexDigitToNibbleOrInvalid(c3) << 8) |
+ (HexDigitToNibbleOrInvalid(c0) << 4) |
+ (HexDigitToNibbleOrInvalid(c1) << 0);
+ // Step 6.b.
+ if (MOZ_UNLIKELY(int32_t(word1) < 0)) {
+ break;
+ }
- // Step 4. (Not applicable in our implementation.)
+ // Step 6.a.
+ auto c4 = chars[index + 4];
+ auto c5 = chars[index + 5];
+ auto c6 = chars[index + 6];
+ auto c7 = chars[index + 7];
- // Step 5.
- size_t index = 0;
+ // Step 6.d.
+ uint32_t word2 = (HexDigitToNibbleOrInvalid(c6) << 12) |
+ (HexDigitToNibbleOrInvalid(c7) << 8) |
+ (HexDigitToNibbleOrInvalid(c4) << 4) |
+ (HexDigitToNibbleOrInvalid(c5) << 0);
+
+ // Step 6.b.
+ if (MOZ_UNLIKELY(int32_t(word2) < 0)) {
+ break;
+ }
+
+ // Step 6.c.
+ index += 4 * 2;
+
+ // Step 6.e.
+ //
+ // The word was constructed in little-endian order, so in the unlikely
+ // case of a big-endian system we have to swap it.
+ uint32_t word =
+ mozilla::NativeEndian::swapFromLittleEndian((word2 << 16) | word1);
+ Ops::store(data32++, word);
+ }
+
+ data = data32.template cast<uint8_t*>();
+ }
// Step 6.
- while (index < length && sink.canAppend()) {
+ while (index < length) {
// Step 6.a.
- char16_t c0 = linear->latin1OrTwoByteChar(index);
- char16_t c1 = linear->latin1OrTwoByteChar(index + 1);
+ auto c0 = chars[index + 0];
+ auto c1 = chars[index + 1];
+
+ // Step 6.d.
+ uint32_t byte = (HexDigitToNibbleOrInvalid(c0) << 4) |
+ (HexDigitToNibbleOrInvalid(c1) << 0);
// Step 6.b.
- if (MOZ_UNLIKELY(!mozilla::IsAsciiHexDigit(c0) ||
- !mozilla::IsAsciiHexDigit(c1))) {
- char16_t ch = !mozilla::IsAsciiHexDigit(c0) ? c0 : c1;
- if (auto str = QuoteString(cx, ch)) {
- JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
- JSMSG_TYPED_ARRAY_BAD_HEX_DIGIT, str.get());
- }
- return false;
+ if (MOZ_UNLIKELY(int32_t(byte) < 0)) {
+ return index;
}
// Step 6.c.
index += 2;
- // Step 6.d.
- uint8_t byte = (mozilla::AsciiAlphanumericToNumber(c0) << 4) +
- mozilla::AsciiAlphanumericToNumber(c1);
-
// Step 6.e.
- if (!sink.append(byte)) {
- return false;
- }
+ Ops::store(data++, uint8_t(byte));
}
// Step 7.
- *readLength = index;
+ return index;
+}
+
+/**
+ * FromHex ( string [ , maxLength ] )
+ *
+ * https://tc39.es/proposal-arraybuffer-base64/spec/#sec-fromhex
+ */
+template <typename Ops>
+static size_t FromHex(JSLinearString* linear, size_t length,
+ TypedArrayObject* tarray) {
+ JS::AutoCheckCannotGC nogc;
+ if (linear->hasLatin1Chars()) {
+ return FromHex<Ops>(linear->latin1Chars(nogc), length, tarray);
+ }
+ return FromHex<Ops>(linear->twoByteChars(nogc), length, tarray);
+}
+
+/**
+ * FromHex ( string [ , maxLength ] )
+ *
+ * https://tc39.es/proposal-arraybuffer-base64/spec/#sec-fromhex
+ */
+static bool FromHex(JSContext* cx, JSString* string, size_t maxLength,
+ TypedArrayObject* tarray) {
+ MOZ_ASSERT(tarray->type() == Scalar::Uint8);
+
+ // The underlying buffer must neither be detached nor shrunk. (It may have
+ // been grown when it's a growable shared buffer and a concurrent thread
+ // resized the buffer.)
+ MOZ_ASSERT(!tarray->hasDetachedBuffer());
+ MOZ_ASSERT(tarray->length().valueOr(0) >= maxLength);
+
+ // Step 1. (Not applicable in our implementation.)
+
+ // Step 2.
+ //
+ // Each byte is encoded in two characters.
+ size_t readLength = maxLength * 2;
+ MOZ_ASSERT(readLength <= string->length());
+
+ // Step 3. (Not applicable in our implementation.)
+
+ JSLinearString* linear = string->ensureLinear(cx);
+ if (!linear) {
+ return false;
+ }
+
+ // Steps 4 and 6-7.
+ size_t index;
+ if (tarray->isSharedMemory()) {
+ index = FromHex<SharedOps>(linear, readLength, tarray);
+ } else {
+ index = FromHex<UnsharedOps>(linear, readLength, tarray);
+ }
+ if (MOZ_UNLIKELY(index < readLength)) {
+ char16_t c0 = linear->latin1OrTwoByteChar(index);
+ char16_t c1 = linear->latin1OrTwoByteChar(index + 1);
+ char16_t ch = !mozilla::IsAsciiHexDigit(c0) ? c0 : c1;
+ if (auto str = QuoteString(cx, ch)) {
+ JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
+ JSMSG_TYPED_ARRAY_BAD_HEX_DIGIT, str.get());
+ }
+ return false;
+ }
return true;
}
@@ -4991,28 +5107,30 @@ static bool uint8array_fromHex(JSContext* cx, unsigned argc, Value* vp) {
}
Rooted<JSString*> string(cx, args[0].toString());
- // Step 2.
- ByteVector bytes(cx);
- ByteSink sink{bytes};
- size_t unusedReadLength;
- if (!FromHex(cx, string, sink, &unusedReadLength)) {
+ // FromHex, step 2.
+ size_t stringLength = string->length();
+
+ // FromHex, step 3.
+ if (stringLength % 2 != 0) {
+ JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
+ JSMSG_TYPED_ARRAY_BAD_HEX_STRING_LENGTH);
return false;
}
- // Step 3.
- size_t resultLength = bytes.length();
+ // Step 4. (Reordered)
+ size_t resultLength = stringLength / 2;
- // Step 4.
- auto* tarray =
- TypedArrayObjectTemplate<uint8_t>::fromLength(cx, resultLength);
+ // Step 5. (Reordered)
+ Rooted<TypedArrayObject*> tarray(
+ cx, TypedArrayObjectTemplate<uint8_t>::fromLength(cx, resultLength));
if (!tarray) {
return false;
}
- // Step 5.
- auto target = SharedMem<uint8_t*>::unshared(tarray->dataPointerUnshared());
- auto source = SharedMem<uint8_t*>::unshared(bytes.begin());
- UnsharedOps::podCopy(target, source, resultLength);
+ // Steps 2-3.
+ if (!FromHex(cx, string, resultLength, tarray)) {
+ return false;
+ }
// Step 6.
args.rval().setObject(*tarray);
@@ -5144,23 +5262,32 @@ static bool uint8array_setFromHex(JSContext* cx, const CallArgs& args) {
Rooted<JSString*> string(cx, args[0].toString());
// Steps 4-6.
- auto length = tarray->length();
- if (!length) {
+ auto byteLength = tarray->length();
+ if (!byteLength) {
ReportOutOfBounds(cx, tarray);
return false;
}
- // Steps 7-9.
- TypedArraySink sink{tarray, *length};
- size_t readLength;
- if (!FromHex(cx, string, sink, &readLength)) {
+ // FromHex, step 2.
+ size_t stringLength = string->length();
+
+ // FromHex, step 3.
+ if (stringLength % 2 != 0) {
+ JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
+ JSMSG_TYPED_ARRAY_BAD_HEX_STRING_LENGTH);
return false;
}
- // Step 10.
- size_t written = sink.written();
+ // |string| encodes |stringLength / 2| bytes, but we can't write more bytes
+ // than there is space in |tarray|.
+ size_t maxLength = std::min(*byteLength, stringLength / 2);
- // Steps 11-13. (Not applicable in our implementation.)
+ // Steps 7-9.
+ if (!FromHex(cx, string, maxLength, tarray)) {
+ return false;
+ }
+
+ // Steps 10-13. (Not applicable in our implementation.)
// Step 14.
Rooted<PlainObject*> result(cx, NewPlainObject(cx));
@@ -5169,13 +5296,18 @@ static bool uint8array_setFromHex(JSContext* cx, const CallArgs& args) {
}
// Step 15.
- Rooted<Value> readValue(cx, NumberValue(readLength));
+ //
+ // Each byte is encoded in two characters, so the number of read characters is
+ // exactly twice as large as |maxLength|.
+ Rooted<Value> readValue(cx, NumberValue(maxLength * 2));
if (!DefineDataProperty(cx, result, cx->names().read, readValue)) {
return false;
}
// Step 16.
- Rooted<Value> writtenValue(cx, NumberValue(written));
+ //
+ // |maxLength| was constrained to the number of bytes written on success.
+ Rooted<Value> writtenValue(cx, NumberValue(maxLength));
if (!DefineDataProperty(cx, result, cx->names().written, writtenValue)) {
return false;
}