tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit 37920fe3b2c1d4db100b775844b47af9d5112fa2
parent a5055272bbfcd18670cd384fa3510b942c082a0c
Author: André Bargull <andre.bargull@gmail.com>
Date:   Fri, 24 Oct 2025 07:59:15 +0000

Bug 1995626 - Part 1: Use a table lookup to replace HexDigitToNibbleOrInvalid. r=iain

Use a table lookup to replace `HexDigitToNibbleOrInvalid`. The decode table has
256 entries so that Latin-1 characters can be decoded branch-free. The table
element type is `int8_t`, to keep the table size small. The elements are later
loaded as `int32_t` for faster error detection.

Generated code for decoding four characters, extracted from a standalone C++
implementation, but should be similar enough to code generated for `FromHex`:
```asm
;; Load four characters
movzx   eax, byte ptr [rdi]
movzx   ecx, byte ptr [rdi + 1]
movzx   edx, byte ptr [rdi + 2]
movzx   esi, byte ptr [rdi + 3]

;; Decode table
lea     rdi, [rip + Hex::Table]

;; Decode c2, sign-extend int8 to int32
movsx   edx, byte ptr [rdx + rdi]
shl     edx, 12

;; Decode c3, ...
movsx   esi, byte ptr [rsi + rdi]
shl     esi, 8
or      esi, edx

;; Decode c0, ...
movsx   edx, byte ptr [rax + rdi]
shl     edx, 4
or      edx, esi

;; Decode c1, ...
movsx   eax, byte ptr [rcx + rdi]
or      eax, edx

;; Check SF set by previous or-instruction
js      .invalid_char
```

Differential Revision: https://phabricator.services.mozilla.com/D269619

Diffstat:
Mjs/src/vm/TypedArrayObject.cpp | 81+++++++++++++++++++++++++++++++++++++++++++------------------------------------
1 file changed, 44 insertions(+), 37 deletions(-)

diff --git a/js/src/vm/TypedArrayObject.cpp b/js/src/vm/TypedArrayObject.cpp @@ -4335,24 +4335,30 @@ static UniqueChars QuoteString(JSContext* cx, char16_t ch) { return sprinter.release(); } -// Constant for an invalid nibble. Choosen so that validation can be performed -// with just 1-2 instructions on all supported architectures. -static constexpr uint32_t InvalidNibble = -1; +namespace Hex { +static constexpr int8_t InvalidChar = -1; -template <typename Char> -static inline uint32_t HexDigitToNibbleOrInvalid(Char ch) { - if ('0' <= ch && ch <= '9') { - return ch - '0'; - } - if ('A' <= ch && ch <= 'F') { - return ch - 'A' + 10; +static constexpr auto DecodeTable() { + std::array<int8_t, 256> result = {}; + + // Initialize all elements to InvalidChar. + for (auto& e : result) { + e = InvalidChar; } - if ('a' <= ch && ch <= 'f') { - return ch - 'a' + 10; + + // Map the ASCII hexadecimal characters to their values. + for (uint8_t i = 0; i < 128; ++i) { + if (mozilla::IsAsciiHexDigit(char(i))) { + result[i] = mozilla::AsciiAlphanumericToNumber(char(i)); + } } - return InvalidNibble; + + return result; } +static constexpr auto Table = DecodeTable(); +} // namespace Hex + /** * FromHex ( string [ , maxLength ] ) * @@ -4363,6 +4369,22 @@ static size_t FromHex(const CharT* chars, size_t length, TypedArrayObject* tarray) { auto data = Ops::extract(tarray).template cast<uint8_t*>(); + static_assert(std::size(Hex::Table) == 256, + "can access decode table using Latin-1 character"); + + auto decodeChar = [&](CharT ch) -> int32_t { + if constexpr (sizeof(CharT) == 1) { + return Hex::Table[ch]; + } else { + return ch <= 255 ? Hex::Table[ch] : Hex::InvalidChar; + } + }; + + auto decode4Chars = [&](const CharT* chars) { + return (decodeChar(chars[2]) << 12) | (decodeChar(chars[3]) << 8) | + (decodeChar(chars[0]) << 4) | (decodeChar(chars[1]) << 0); + }; + // Step 4. size_t index = 0; @@ -4376,38 +4398,23 @@ static size_t FromHex(const CharT* chars, size_t length, // Step 6. while (index < alignedLength) { - // Step 6.a. - auto c0 = chars[index + 0]; - auto c1 = chars[index + 1]; - auto c2 = chars[index + 2]; - auto c3 = chars[index + 3]; - - // Step 6.d. - uint32_t word1 = (HexDigitToNibbleOrInvalid(c2) << 12) | - (HexDigitToNibbleOrInvalid(c3) << 8) | - (HexDigitToNibbleOrInvalid(c0) << 4) | - (HexDigitToNibbleOrInvalid(c1) << 0); + // Steps 6.a and 6.d. + uint32_t word1 = decode4Chars(chars + index); + // Step 6.b. if (MOZ_UNLIKELY(int32_t(word1) < 0)) { break; } + MOZ_ASSERT(0 <= word1 && word1 <= 0xffff); - // Step 6.a. - auto c4 = chars[index + 4]; - auto c5 = chars[index + 5]; - auto c6 = chars[index + 6]; - auto c7 = chars[index + 7]; - - // Step 6.d. - uint32_t word2 = (HexDigitToNibbleOrInvalid(c6) << 12) | - (HexDigitToNibbleOrInvalid(c7) << 8) | - (HexDigitToNibbleOrInvalid(c4) << 4) | - (HexDigitToNibbleOrInvalid(c5) << 0); + // Step 6.a and 6.d. + uint32_t word2 = decode4Chars(chars + index + 4); // Step 6.b. if (MOZ_UNLIKELY(int32_t(word2) < 0)) { break; } + MOZ_ASSERT(0 <= word2 && word2 <= 0xffff); // Step 6.c. index += 4 * 2; @@ -4431,13 +4438,13 @@ static size_t FromHex(const CharT* chars, size_t length, auto c1 = chars[index + 1]; // Step 6.d. - uint32_t byte = (HexDigitToNibbleOrInvalid(c0) << 4) | - (HexDigitToNibbleOrInvalid(c1) << 0); + uint32_t byte = (decodeChar(c0) << 4) | (decodeChar(c1) << 0); // Step 6.b. if (MOZ_UNLIKELY(int32_t(byte) < 0)) { return index; } + MOZ_ASSERT(0 <= byte && byte <= 0xff); // Step 6.c. index += 2;