commit 37920fe3b2c1d4db100b775844b47af9d5112fa2
parent a5055272bbfcd18670cd384fa3510b942c082a0c
Author: André Bargull <andre.bargull@gmail.com>
Date: Fri, 24 Oct 2025 07:59:15 +0000
Bug 1995626 - Part 1: Use a table lookup to replace HexDigitToNibbleOrInvalid. r=iain
Use a table lookup to replace `HexDigitToNibbleOrInvalid`. The decode table has
256 entries so that Latin-1 characters can be decoded branch-free. The table
element type is `int8_t`, to keep the table size small. The elements are later
loaded as `int32_t` for faster error detection.
Generated code for decoding four characters, extracted from a standalone C++
implementation, but should be similar enough to code generated for `FromHex`:
```asm
;; Load four characters
movzx eax, byte ptr [rdi]
movzx ecx, byte ptr [rdi + 1]
movzx edx, byte ptr [rdi + 2]
movzx esi, byte ptr [rdi + 3]
;; Decode table
lea rdi, [rip + Hex::Table]
;; Decode c2, sign-extend int8 to int32
movsx edx, byte ptr [rdx + rdi]
shl edx, 12
;; Decode c3, ...
movsx esi, byte ptr [rsi + rdi]
shl esi, 8
or esi, edx
;; Decode c0, ...
movsx edx, byte ptr [rax + rdi]
shl edx, 4
or edx, esi
;; Decode c1, ...
movsx eax, byte ptr [rcx + rdi]
or eax, edx
;; Check SF set by previous or-instruction
js .invalid_char
```
Differential Revision: https://phabricator.services.mozilla.com/D269619
Diffstat:
1 file changed, 44 insertions(+), 37 deletions(-)
diff --git a/js/src/vm/TypedArrayObject.cpp b/js/src/vm/TypedArrayObject.cpp
@@ -4335,24 +4335,30 @@ static UniqueChars QuoteString(JSContext* cx, char16_t ch) {
return sprinter.release();
}
-// Constant for an invalid nibble. Choosen so that validation can be performed
-// with just 1-2 instructions on all supported architectures.
-static constexpr uint32_t InvalidNibble = -1;
+namespace Hex {
+static constexpr int8_t InvalidChar = -1;
-template <typename Char>
-static inline uint32_t HexDigitToNibbleOrInvalid(Char ch) {
- if ('0' <= ch && ch <= '9') {
- return ch - '0';
- }
- if ('A' <= ch && ch <= 'F') {
- return ch - 'A' + 10;
+static constexpr auto DecodeTable() {
+ std::array<int8_t, 256> result = {};
+
+ // Initialize all elements to InvalidChar.
+ for (auto& e : result) {
+ e = InvalidChar;
}
- if ('a' <= ch && ch <= 'f') {
- return ch - 'a' + 10;
+
+ // Map the ASCII hexadecimal characters to their values.
+ for (uint8_t i = 0; i < 128; ++i) {
+ if (mozilla::IsAsciiHexDigit(char(i))) {
+ result[i] = mozilla::AsciiAlphanumericToNumber(char(i));
+ }
}
- return InvalidNibble;
+
+ return result;
}
+static constexpr auto Table = DecodeTable();
+} // namespace Hex
+
/**
* FromHex ( string [ , maxLength ] )
*
@@ -4363,6 +4369,22 @@ static size_t FromHex(const CharT* chars, size_t length,
TypedArrayObject* tarray) {
auto data = Ops::extract(tarray).template cast<uint8_t*>();
+ static_assert(std::size(Hex::Table) == 256,
+ "can access decode table using Latin-1 character");
+
+ auto decodeChar = [&](CharT ch) -> int32_t {
+ if constexpr (sizeof(CharT) == 1) {
+ return Hex::Table[ch];
+ } else {
+ return ch <= 255 ? Hex::Table[ch] : Hex::InvalidChar;
+ }
+ };
+
+ auto decode4Chars = [&](const CharT* chars) {
+ return (decodeChar(chars[2]) << 12) | (decodeChar(chars[3]) << 8) |
+ (decodeChar(chars[0]) << 4) | (decodeChar(chars[1]) << 0);
+ };
+
// Step 4.
size_t index = 0;
@@ -4376,38 +4398,23 @@ static size_t FromHex(const CharT* chars, size_t length,
// Step 6.
while (index < alignedLength) {
- // Step 6.a.
- auto c0 = chars[index + 0];
- auto c1 = chars[index + 1];
- auto c2 = chars[index + 2];
- auto c3 = chars[index + 3];
-
- // Step 6.d.
- uint32_t word1 = (HexDigitToNibbleOrInvalid(c2) << 12) |
- (HexDigitToNibbleOrInvalid(c3) << 8) |
- (HexDigitToNibbleOrInvalid(c0) << 4) |
- (HexDigitToNibbleOrInvalid(c1) << 0);
+ // Steps 6.a and 6.d.
+ uint32_t word1 = decode4Chars(chars + index);
+
// Step 6.b.
if (MOZ_UNLIKELY(int32_t(word1) < 0)) {
break;
}
+ MOZ_ASSERT(0 <= word1 && word1 <= 0xffff);
- // Step 6.a.
- auto c4 = chars[index + 4];
- auto c5 = chars[index + 5];
- auto c6 = chars[index + 6];
- auto c7 = chars[index + 7];
-
- // Step 6.d.
- uint32_t word2 = (HexDigitToNibbleOrInvalid(c6) << 12) |
- (HexDigitToNibbleOrInvalid(c7) << 8) |
- (HexDigitToNibbleOrInvalid(c4) << 4) |
- (HexDigitToNibbleOrInvalid(c5) << 0);
+ // Step 6.a and 6.d.
+ uint32_t word2 = decode4Chars(chars + index + 4);
// Step 6.b.
if (MOZ_UNLIKELY(int32_t(word2) < 0)) {
break;
}
+ MOZ_ASSERT(0 <= word2 && word2 <= 0xffff);
// Step 6.c.
index += 4 * 2;
@@ -4431,13 +4438,13 @@ static size_t FromHex(const CharT* chars, size_t length,
auto c1 = chars[index + 1];
// Step 6.d.
- uint32_t byte = (HexDigitToNibbleOrInvalid(c0) << 4) |
- (HexDigitToNibbleOrInvalid(c1) << 0);
+ uint32_t byte = (decodeChar(c0) << 4) | (decodeChar(c1) << 0);
// Step 6.b.
if (MOZ_UNLIKELY(int32_t(byte) < 0)) {
return index;
}
+ MOZ_ASSERT(0 <= byte && byte <= 0xff);
// Step 6.c.
index += 2;