[ tor-browser ].git.dasho

commit 37920fe3b2c1d4db100b775844b47af9d5112fa2
parent a5055272bbfcd18670cd384fa3510b942c082a0c
Author: André Bargull <andre.bargull@gmail.com>
Date:   Fri, 24 Oct 2025 07:59:15 +0000

Bug 1995626 - Part 1: Use a table lookup to replace HexDigitToNibbleOrInvalid. r=iain

Use a table lookup to replace `HexDigitToNibbleOrInvalid`. The decode table has
256 entries so that Latin-1 characters can be decoded branch-free. The table
element type is `int8_t`, to keep the table size small. The elements are later
loaded as `int32_t` for faster error detection.

Generated code for decoding four characters, extracted from a standalone C++
implementation, but should be similar enough to code generated for `FromHex`:
```asm
;; Load four characters
movzx   eax, byte ptr [rdi]
movzx   ecx, byte ptr [rdi + 1]
movzx   edx, byte ptr [rdi + 2]
movzx   esi, byte ptr [rdi + 3]

;; Decode table
lea     rdi, [rip + Hex::Table]

;; Decode c2, sign-extend int8 to int32
movsx   edx, byte ptr [rdx + rdi]
shl     edx, 12

;; Decode c3, ...
movsx   esi, byte ptr [rsi + rdi]
shl     esi, 8
or      esi, edx

;; Decode c0, ...
movsx   edx, byte ptr [rax + rdi]
shl     edx, 4
or      edx, esi

;; Decode c1, ...
movsx   eax, byte ptr [rcx + rdi]
or      eax, edx

;; Check SF set by previous or-instruction
js      .invalid_char
```

Differential Revision: https://phabricator.services.mozilla.com/D269619

Diffstat:
M js/src/vm/TypedArrayObject.cpp  | 81 +++++++++++++++++++++++++++++++++++++++++++------------------------------------

1 file changed, 44 insertions(+), 37 deletions(-)
diff --git a/js/src/vm/TypedArrayObject.cpp b/js/src/vm/TypedArrayObject.cpp
@@ -4335,24 +4335,30 @@ static UniqueChars QuoteString(JSContext* cx, char16_t ch) {
   return sprinter.release();
 }
 
-// Constant for an invalid nibble. Choosen so that validation can be performed
-// with just 1-2 instructions on all supported architectures.
-static constexpr uint32_t InvalidNibble = -1;
+namespace Hex {
+static constexpr int8_t InvalidChar = -1;
 
-template <typename Char>
-static inline uint32_t HexDigitToNibbleOrInvalid(Char ch) {
-  if ('0' <= ch && ch <= '9') {
-    return ch - '0';
-  }
-  if ('A' <= ch && ch <= 'F') {
-    return ch - 'A' + 10;
+static constexpr auto DecodeTable() {
+  std::array<int8_t, 256> result = {};
+
+  // Initialize all elements to InvalidChar.
+  for (auto& e : result) {
+    e = InvalidChar;
   }
-  if ('a' <= ch && ch <= 'f') {
-    return ch - 'a' + 10;
+
+  // Map the ASCII hexadecimal characters to their values.
+  for (uint8_t i = 0; i < 128; ++i) {
+    if (mozilla::IsAsciiHexDigit(char(i))) {
+      result[i] = mozilla::AsciiAlphanumericToNumber(char(i));
+    }
   }
-  return InvalidNibble;
+
+  return result;
 }
 
+static constexpr auto Table = DecodeTable();
+}  // namespace Hex
+
 /**
  * FromHex ( string [ , maxLength ] )
  *
@@ -4363,6 +4369,22 @@ static size_t FromHex(const CharT* chars, size_t length,
                       TypedArrayObject* tarray) {
   auto data = Ops::extract(tarray).template cast<uint8_t*>();
 
+  static_assert(std::size(Hex::Table) == 256,
+                "can access decode table using Latin-1 character");
+
+  auto decodeChar = [&](CharT ch) -> int32_t {
+    if constexpr (sizeof(CharT) == 1) {
+      return Hex::Table[ch];
+    } else {
+      return ch <= 255 ? Hex::Table[ch] : Hex::InvalidChar;
+    }
+  };
+
+  auto decode4Chars = [&](const CharT* chars) {
+    return (decodeChar(chars[2]) << 12) | (decodeChar(chars[3]) << 8) |
+           (decodeChar(chars[0]) << 4) | (decodeChar(chars[1]) << 0);
+  };
+
   // Step 4.
   size_t index = 0;
 
@@ -4376,38 +4398,23 @@ static size_t FromHex(const CharT* chars, size_t length,
 
     // Step 6.
     while (index < alignedLength) {
-      // Step 6.a.
-      auto c0 = chars[index + 0];
-      auto c1 = chars[index + 1];
-      auto c2 = chars[index + 2];
-      auto c3 = chars[index + 3];
-
-      // Step 6.d.
-      uint32_t word1 = (HexDigitToNibbleOrInvalid(c2) << 12) |
-                       (HexDigitToNibbleOrInvalid(c3) << 8) |
-                       (HexDigitToNibbleOrInvalid(c0) << 4) |
-                       (HexDigitToNibbleOrInvalid(c1) << 0);
+      // Steps 6.a and 6.d.
+      uint32_t word1 = decode4Chars(chars + index);
+
       // Step 6.b.
       if (MOZ_UNLIKELY(int32_t(word1) < 0)) {
         break;
       }
+      MOZ_ASSERT(0 <= word1 && word1 <= 0xffff);
 
-      // Step 6.a.
-      auto c4 = chars[index + 4];
-      auto c5 = chars[index + 5];
-      auto c6 = chars[index + 6];
-      auto c7 = chars[index + 7];
-
-      // Step 6.d.
-      uint32_t word2 = (HexDigitToNibbleOrInvalid(c6) << 12) |
-                       (HexDigitToNibbleOrInvalid(c7) << 8) |
-                       (HexDigitToNibbleOrInvalid(c4) << 4) |
-                       (HexDigitToNibbleOrInvalid(c5) << 0);
+      // Step 6.a and 6.d.
+      uint32_t word2 = decode4Chars(chars + index + 4);
 
       // Step 6.b.
       if (MOZ_UNLIKELY(int32_t(word2) < 0)) {
         break;
       }
+      MOZ_ASSERT(0 <= word2 && word2 <= 0xffff);
 
       // Step 6.c.
       index += 4 * 2;
@@ -4431,13 +4438,13 @@ static size_t FromHex(const CharT* chars, size_t length,
     auto c1 = chars[index + 1];
 
     // Step 6.d.
-    uint32_t byte = (HexDigitToNibbleOrInvalid(c0) << 4) |
-                    (HexDigitToNibbleOrInvalid(c1) << 0);
+    uint32_t byte = (decodeChar(c0) << 4) | (decodeChar(c1) << 0);
 
     // Step 6.b.
     if (MOZ_UNLIKELY(int32_t(byte) < 0)) {
       return index;
     }
+    MOZ_ASSERT(0 <= byte && byte <= 0xff);
 
     // Step 6.c.
     index += 2;

	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE