tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit 13d388fa5201d0b1b92b7ee2acd75243f7ad7924
parent 6a7261ea6224640f30a71ba61c3eed32d285c7c4
Author: André Bargull <andre.bargull@gmail.com>
Date:   Mon, 20 Oct 2025 12:03:53 +0000

Bug 1994067 - Part 2: Improve performance of Uint8Array.fromHex. r=spidermonkey-reviewers,iain

Changes:
- Add templates for shared and unshared memory and for Latin-1 and Two-Byte
  characters. This avoids repeatedly checking the kind of the source resp.
  output.
- Change `FromHex` to directly compute the last valid string index when called
  from `setFromHex`. That avoids the `sink.canAppend()` call from the old
  implementation.
- Replace `mozilla::IsAsciiHexDigit` and `mozilla::AsciiAlphanumericToNumber` with
  `HexDigitToNibbleOrInvalid` to perform validation and conversion in a single method.
- And add a loop to process eight characters per loop iteration to minimise the
  store instructions into the output.

For a 5KB Uint8Array, 55-60% of the time is now spend in `FromHex`, the rest is
call and allocation overhead.

Differential Revision: https://phabricator.services.mozilla.com/D269016

Diffstat:
Mjs/src/vm/TypedArrayObject.cpp | 258+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------
1 file changed, 195 insertions(+), 63 deletions(-)

diff --git a/js/src/vm/TypedArrayObject.cpp b/js/src/vm/TypedArrayObject.cpp @@ -9,6 +9,7 @@ #include "mozilla/Casting.h" #include "mozilla/CheckedInt.h" +#include "mozilla/EndianUtils.h" #include "mozilla/FloatingPoint.h" #include "mozilla/IntegerTypeTraits.h" #include "mozilla/Likely.h" @@ -4382,68 +4383,183 @@ static UniqueChars QuoteString(JSContext* cx, char16_t ch) { return sprinter.release(); } +// Constant for an invalid nibble. Choosen so that validation can be performed +// with just 1-2 instructions on all supported architectures. +static constexpr uint32_t InvalidNibble = -1; + +template <typename Char> +static inline uint32_t HexDigitToNibbleOrInvalid(Char ch) { + if ('0' <= ch && ch <= '9') { + return ch - '0'; + } + if ('A' <= ch && ch <= 'F') { + return ch - 'A' + 10; + } + if ('a' <= ch && ch <= 'f') { + return ch - 'a' + 10; + } + return InvalidNibble; +} + /** * FromHex ( string [ , maxLength ] ) * * https://tc39.es/proposal-arraybuffer-base64/spec/#sec-fromhex */ -template <class Sink> -static bool FromHex(JSContext* cx, Handle<JSString*> string, Sink& sink, - size_t* readLength) { - // Step 1. (Not applicable in our implementation.) +template <typename Ops, typename CharT> +static size_t FromHex(const CharT* chars, size_t length, + TypedArrayObject* tarray) { + auto data = Ops::extract(tarray).template cast<uint8_t*>(); - // Step 2. - size_t length = string->length(); + // Step 4. + size_t index = 0; - // Step 3. - if (length % 2 != 0) { - JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, - JSMSG_TYPED_ARRAY_BAD_HEX_STRING_LENGTH); - return false; - } + // Step 5. (Checked in caller.) + MOZ_ASSERT(length % 2 == 0); - JSLinearString* linear = string->ensureLinear(cx); - if (!linear) { - return false; - } + // Process eight characters per loop iteration. + size_t alignedLength = length & ~7; + if (index < alignedLength) { + auto data32 = data.template cast<uint32_t*>(); + + // Step 6. + while (index < alignedLength) { + // Step 6.a. + auto c0 = chars[index + 0]; + auto c1 = chars[index + 1]; + auto c2 = chars[index + 2]; + auto c3 = chars[index + 3]; + + // Step 6.d. + uint32_t word1 = (HexDigitToNibbleOrInvalid(c2) << 12) | + (HexDigitToNibbleOrInvalid(c3) << 8) | + (HexDigitToNibbleOrInvalid(c0) << 4) | + (HexDigitToNibbleOrInvalid(c1) << 0); + // Step 6.b. + if (MOZ_UNLIKELY(int32_t(word1) < 0)) { + break; + } - // Step 4. (Not applicable in our implementation.) + // Step 6.a. + auto c4 = chars[index + 4]; + auto c5 = chars[index + 5]; + auto c6 = chars[index + 6]; + auto c7 = chars[index + 7]; - // Step 5. - size_t index = 0; + // Step 6.d. + uint32_t word2 = (HexDigitToNibbleOrInvalid(c6) << 12) | + (HexDigitToNibbleOrInvalid(c7) << 8) | + (HexDigitToNibbleOrInvalid(c4) << 4) | + (HexDigitToNibbleOrInvalid(c5) << 0); + + // Step 6.b. + if (MOZ_UNLIKELY(int32_t(word2) < 0)) { + break; + } + + // Step 6.c. + index += 4 * 2; + + // Step 6.e. + // + // The word was constructed in little-endian order, so in the unlikely + // case of a big-endian system we have to swap it. + uint32_t word = + mozilla::NativeEndian::swapFromLittleEndian((word2 << 16) | word1); + Ops::store(data32++, word); + } + + data = data32.template cast<uint8_t*>(); + } // Step 6. - while (index < length && sink.canAppend()) { + while (index < length) { // Step 6.a. - char16_t c0 = linear->latin1OrTwoByteChar(index); - char16_t c1 = linear->latin1OrTwoByteChar(index + 1); + auto c0 = chars[index + 0]; + auto c1 = chars[index + 1]; + + // Step 6.d. + uint32_t byte = (HexDigitToNibbleOrInvalid(c0) << 4) | + (HexDigitToNibbleOrInvalid(c1) << 0); // Step 6.b. - if (MOZ_UNLIKELY(!mozilla::IsAsciiHexDigit(c0) || - !mozilla::IsAsciiHexDigit(c1))) { - char16_t ch = !mozilla::IsAsciiHexDigit(c0) ? c0 : c1; - if (auto str = QuoteString(cx, ch)) { - JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, - JSMSG_TYPED_ARRAY_BAD_HEX_DIGIT, str.get()); - } - return false; + if (MOZ_UNLIKELY(int32_t(byte) < 0)) { + return index; } // Step 6.c. index += 2; - // Step 6.d. - uint8_t byte = (mozilla::AsciiAlphanumericToNumber(c0) << 4) + - mozilla::AsciiAlphanumericToNumber(c1); - // Step 6.e. - if (!sink.append(byte)) { - return false; - } + Ops::store(data++, uint8_t(byte)); } // Step 7. - *readLength = index; + return index; +} + +/** + * FromHex ( string [ , maxLength ] ) + * + * https://tc39.es/proposal-arraybuffer-base64/spec/#sec-fromhex + */ +template <typename Ops> +static size_t FromHex(JSLinearString* linear, size_t length, + TypedArrayObject* tarray) { + JS::AutoCheckCannotGC nogc; + if (linear->hasLatin1Chars()) { + return FromHex<Ops>(linear->latin1Chars(nogc), length, tarray); + } + return FromHex<Ops>(linear->twoByteChars(nogc), length, tarray); +} + +/** + * FromHex ( string [ , maxLength ] ) + * + * https://tc39.es/proposal-arraybuffer-base64/spec/#sec-fromhex + */ +static bool FromHex(JSContext* cx, JSString* string, size_t maxLength, + TypedArrayObject* tarray) { + MOZ_ASSERT(tarray->type() == Scalar::Uint8); + + // The underlying buffer must neither be detached nor shrunk. (It may have + // been grown when it's a growable shared buffer and a concurrent thread + // resized the buffer.) + MOZ_ASSERT(!tarray->hasDetachedBuffer()); + MOZ_ASSERT(tarray->length().valueOr(0) >= maxLength); + + // Step 1. (Not applicable in our implementation.) + + // Step 2. + // + // Each byte is encoded in two characters. + size_t readLength = maxLength * 2; + MOZ_ASSERT(readLength <= string->length()); + + // Step 3. (Not applicable in our implementation.) + + JSLinearString* linear = string->ensureLinear(cx); + if (!linear) { + return false; + } + + // Steps 4 and 6-7. + size_t index; + if (tarray->isSharedMemory()) { + index = FromHex<SharedOps>(linear, readLength, tarray); + } else { + index = FromHex<UnsharedOps>(linear, readLength, tarray); + } + if (MOZ_UNLIKELY(index < readLength)) { + char16_t c0 = linear->latin1OrTwoByteChar(index); + char16_t c1 = linear->latin1OrTwoByteChar(index + 1); + char16_t ch = !mozilla::IsAsciiHexDigit(c0) ? c0 : c1; + if (auto str = QuoteString(cx, ch)) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_TYPED_ARRAY_BAD_HEX_DIGIT, str.get()); + } + return false; + } return true; } @@ -4991,28 +5107,30 @@ static bool uint8array_fromHex(JSContext* cx, unsigned argc, Value* vp) { } Rooted<JSString*> string(cx, args[0].toString()); - // Step 2. - ByteVector bytes(cx); - ByteSink sink{bytes}; - size_t unusedReadLength; - if (!FromHex(cx, string, sink, &unusedReadLength)) { + // FromHex, step 2. + size_t stringLength = string->length(); + + // FromHex, step 3. + if (stringLength % 2 != 0) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_TYPED_ARRAY_BAD_HEX_STRING_LENGTH); return false; } - // Step 3. - size_t resultLength = bytes.length(); + // Step 4. (Reordered) + size_t resultLength = stringLength / 2; - // Step 4. - auto* tarray = - TypedArrayObjectTemplate<uint8_t>::fromLength(cx, resultLength); + // Step 5. (Reordered) + Rooted<TypedArrayObject*> tarray( + cx, TypedArrayObjectTemplate<uint8_t>::fromLength(cx, resultLength)); if (!tarray) { return false; } - // Step 5. - auto target = SharedMem<uint8_t*>::unshared(tarray->dataPointerUnshared()); - auto source = SharedMem<uint8_t*>::unshared(bytes.begin()); - UnsharedOps::podCopy(target, source, resultLength); + // Steps 2-3. + if (!FromHex(cx, string, resultLength, tarray)) { + return false; + } // Step 6. args.rval().setObject(*tarray); @@ -5144,23 +5262,32 @@ static bool uint8array_setFromHex(JSContext* cx, const CallArgs& args) { Rooted<JSString*> string(cx, args[0].toString()); // Steps 4-6. - auto length = tarray->length(); - if (!length) { + auto byteLength = tarray->length(); + if (!byteLength) { ReportOutOfBounds(cx, tarray); return false; } - // Steps 7-9. - TypedArraySink sink{tarray, *length}; - size_t readLength; - if (!FromHex(cx, string, sink, &readLength)) { + // FromHex, step 2. + size_t stringLength = string->length(); + + // FromHex, step 3. + if (stringLength % 2 != 0) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, + JSMSG_TYPED_ARRAY_BAD_HEX_STRING_LENGTH); return false; } - // Step 10. - size_t written = sink.written(); + // |string| encodes |stringLength / 2| bytes, but we can't write more bytes + // than there is space in |tarray|. + size_t maxLength = std::min(*byteLength, stringLength / 2); - // Steps 11-13. (Not applicable in our implementation.) + // Steps 7-9. + if (!FromHex(cx, string, maxLength, tarray)) { + return false; + } + + // Steps 10-13. (Not applicable in our implementation.) // Step 14. Rooted<PlainObject*> result(cx, NewPlainObject(cx)); @@ -5169,13 +5296,18 @@ static bool uint8array_setFromHex(JSContext* cx, const CallArgs& args) { } // Step 15. - Rooted<Value> readValue(cx, NumberValue(readLength)); + // + // Each byte is encoded in two characters, so the number of read characters is + // exactly twice as large as |maxLength|. + Rooted<Value> readValue(cx, NumberValue(maxLength * 2)); if (!DefineDataProperty(cx, result, cx->names().read, readValue)) { return false; } // Step 16. - Rooted<Value> writtenValue(cx, NumberValue(written)); + // + // |maxLength| was constrained to the number of bytes written on success. + Rooted<Value> writtenValue(cx, NumberValue(maxLength)); if (!DefineDataProperty(cx, result, cx->names().written, writtenValue)) { return false; }