tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

regexp-bytecodes-inl.h (9585B)


      1 // Copyright 2025 the V8 project authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef V8_REGEXP_REGEXP_BYTECODES_INL_H_
      6 #define V8_REGEXP_REGEXP_BYTECODES_INL_H_
      7 
      8 #include "irregexp/imported/regexp-bytecodes.h"
      9 // Include the non-inl header before the rest of the headers.
     10 
     11 #include <type_traits>
     12 
     13 namespace v8 {
     14 namespace internal {
     15 
     16 template <RegExpBytecodeOperandType>
     17 struct RegExpOperandTypeTraits;
     18 
     19 #define DECLARE_BASIC_OPERAND_TYPE_TRAITS(Name, CType)                 \
     20  template <>                                                          \
     21  struct RegExpOperandTypeTraits<RegExpBytecodeOperandType::k##Name> { \
     22    static_assert(!std::is_pointer_v<CType>);                          \
     23    static constexpr uint8_t kSize = sizeof(CType);                    \
     24    using kCType = CType;                                              \
     25    static constexpr bool kIsBasic = true;                             \
     26  };
     27 BASIC_BYTECODE_OPERAND_TYPE_LIST(DECLARE_BASIC_OPERAND_TYPE_TRAITS)
     28 #undef DECLARE_OPERAND_TYPE_TRAITS
     29 
     30 #define DECLARE_SPECIAL_OPERAND_TYPE_TRAITS(Name, Size)                \
     31  template <>                                                          \
     32  struct RegExpOperandTypeTraits<RegExpBytecodeOperandType::k##Name> { \
     33    static constexpr uint8_t kSize = Size;                             \
     34    static constexpr bool kIsBasic = false;                            \
     35  };
     36 SPECIAL_BYTECODE_OPERAND_TYPE_LIST(DECLARE_SPECIAL_OPERAND_TYPE_TRAITS)
     37 #undef DECLARE_OPERAND_TYPE_TRAITS
     38 
     39 namespace detail {
     40 
     41 // Bytecode is 4-byte aligned.
     42 // We can pack operands if multiple operands fit into 4 bytes.
     43 static constexpr int kBytecodeAlignment = 4;
     44 
     45 // Calculates packed offsets for each Bytecode operand.
     46 // The first operand can be packed together with the bytecode at an unaligned
     47 // offset 1. All other operands are aligned to their own size if
     48 // they are "basic" types.
     49 template <RegExpBytecodeOperandType... operand_types>
     50 consteval auto CalculatePackedOffsets() {
     51  constexpr int N = sizeof...(operand_types);
     52  constexpr std::array<uint8_t, N> kOperandSizes = {
     53      RegExpOperandTypeTraits<operand_types>::kSize...};
     54  constexpr std::array<bool, N> kIsBasic = {
     55      RegExpOperandTypeTraits<operand_types>::kIsBasic...};
     56 
     57  std::array<int, N> offsets{};
     58  int first_offset = sizeof(RegExpBytecode);
     59  int offset = first_offset;
     60 
     61  for (size_t i = 0; i < N; ++i) {
     62    uint8_t operand_size = kOperandSizes[i];
     63 
     64    // An operand is only allowed to be unaligned, if it's packed with the
     65    // bytecode. All subsequent basic operands must be aligned to their own
     66    // size.
     67    if (offset > first_offset && kIsBasic[i]) {
     68      offset = RoundUp(offset, operand_size);
     69    }
     70 
     71    // If the operand doesn't fit into the current 4-byte block, start a new
     72    // 4-byte block.
     73    if ((offset % kBytecodeAlignment) + operand_size > kBytecodeAlignment) {
     74      offset = RoundUp<kBytecodeAlignment>(offset);
     75    }
     76 
     77    offsets[i] = offset;
     78    offset += operand_size;
     79  }
     80 
     81  return offsets;
     82 }
     83 
     84 template <RegExpBytecodeOperandType... ops>
     85 struct RegExpBytecodeOperandsTraits {
     86  static constexpr int kOperandCount = sizeof...(ops);
     87  static constexpr std::array<RegExpBytecodeOperandType, kOperandCount>
     88      kOperandTypes = {ops...};
     89  static constexpr std::array<uint8_t, kOperandCount> kOperandSizes = {
     90      RegExpOperandTypeTraits<ops>::kSize...};
     91  static constexpr std::array<int, kOperandCount> kOperandOffsets =
     92      CalculatePackedOffsets<ops...>();
     93  static constexpr int kSize = RoundUp<kBytecodeAlignment>(
     94      kOperandCount == 0 ? sizeof(RegExpBytecode)
     95                         : kOperandOffsets.back() + kOperandSizes.back());
     96 };
     97 
     98 template <RegExpBytecode bc>
     99 struct RegExpBytecodeOperandNames;
    100 
    101 #define DECLARE_OPERAND_NAMES(CamelName, SnakeName, OpNames, OpTypes) \
    102  template <>                                                         \
    103  struct RegExpBytecodeOperandNames<RegExpBytecode::k##CamelName> {   \
    104    enum class Operand { UNPAREN(OpNames) };                          \
    105    using enum Operand;                                               \
    106  };
    107 REGEXP_BYTECODE_LIST(DECLARE_OPERAND_NAMES)
    108 #undef DECLARE_OPERAND_NAMES
    109 
    110 template <RegExpBytecode bc, RegExpBytecodeOperandType... OpTypes>
    111 class RegExpBytecodeOperandsBase {
    112 public:
    113  using Operand = RegExpBytecodeOperandNames<bc>::Operand;
    114  using Traits = RegExpBytecodeOperandsTraits<OpTypes...>;
    115  static constexpr int kCount = Traits::kOperandCount;
    116  static constexpr int kTotalSize = Traits::kSize;
    117  static consteval int Index(Operand op) { return static_cast<uint8_t>(op); }
    118  static consteval int Size(Operand op) {
    119    return Traits::kOperandSizes[Index(op)];
    120  }
    121  static consteval int Offset(Operand op) {
    122    return Traits::kOperandOffsets[Index(op)];
    123  }
    124  static consteval RegExpBytecodeOperandType Type(Operand op) {
    125    return Traits::kOperandTypes[Index(op)];
    126  }
    127 
    128 private:
    129  template <RegExpBytecodeOperandType OperandType>
    130    requires(RegExpOperandTypeTraits<OperandType>::kIsBasic)
    131  static auto GetAligned(const uint8_t* pc, int offset) {
    132    DCHECK_EQ(*pc, RegExpBytecodes::ToByte(bc));
    133    using CType = RegExpOperandTypeTraits<OperandType>::kCType;
    134    DCHECK(IsAligned(offset, sizeof(CType)));
    135    return *reinterpret_cast<const CType*>(pc + offset);
    136  }
    137 
    138  // TODO(pthier): We can remove unaligned packing once we have fully switched
    139  // to the new bytecode layout. This is for backwards-compatibility with the
    140  // old layout only.
    141  template <RegExpBytecodeOperandType OperandType>
    142    requires(RegExpOperandTypeTraits<OperandType>::kIsBasic)
    143  static auto GetPacked(const uint8_t* pc, int offset) {
    144    DCHECK_EQ(*pc, RegExpBytecodes::ToByte(bc));
    145    // Only unaligned packing of 2-byte values with the bytecode is supported.
    146    DCHECK_EQ(offset, 1);
    147    static_assert(RegExpOperandTypeTraits<OperandType>::kSize == 2);
    148    using CType = RegExpOperandTypeTraits<OperandType>::kCType;
    149    DCHECK(!IsAligned(offset, sizeof(CType)));
    150    int32_t packed_value = *reinterpret_cast<const int32_t*>(pc);
    151    return static_cast<CType>(packed_value >> BYTECODE_SHIFT);
    152  }
    153 
    154 public:
    155  template <Operand op>
    156    requires(RegExpOperandTypeTraits<Type(op)>::kIsBasic)
    157  static auto Get(const uint8_t* pc) {
    158    constexpr RegExpBytecodeOperandType OperandType = Type(op);
    159    constexpr int offset = Offset(op);
    160    using CType = RegExpOperandTypeTraits<OperandType>::kCType;
    161    // TODO(pthier): We can remove unaligned packing once we have fully switched
    162    // to the new bytecode layout. This is for backwards-compatibility with the
    163    // old layout only.
    164    if constexpr (!IsAligned(offset, sizeof(CType))) {
    165      return GetPacked<OperandType>(pc, offset);
    166    } else {
    167      return GetAligned<OperandType>(pc, offset);
    168    }
    169  }
    170 
    171  template <Operand op>
    172    requires(Type(op) == RegExpBytecodeOperandType::kBitTable)
    173  static auto Get(const uint8_t* pc) {
    174    DCHECK_EQ(*pc, RegExpBytecodes::ToByte(bc));
    175    constexpr int offset = Offset(op);
    176    return pc + offset;
    177  }
    178 };
    179 
    180 }  // namespace detail
    181 
    182 #define PACK_OPTIONAL(x, ...) x __VA_OPT__(, ) __VA_ARGS__
    183 
    184 #define DECLARE_OPERANDS(CamelName, SnakeName, OpNames, OpTypes)   \
    185  template <>                                                      \
    186  class RegExpBytecodeOperands<RegExpBytecode::k##CamelName> final \
    187      : public detail::RegExpBytecodeOperandsBase<PACK_OPTIONAL(   \
    188            RegExpBytecode::k##CamelName, UNPAREN(OpTypes))>,      \
    189        public AllStatic {                                         \
    190   public:                                                         \
    191    using enum Operand;                                            \
    192  };
    193 
    194 REGEXP_BYTECODE_LIST(DECLARE_OPERANDS)
    195 #undef DECLARE_OPERANDS
    196 
    197 namespace detail {
    198 
    199 #define DECLARE_BYTECODE_NAMES(CamelName, ...) #CamelName,
    200 static constexpr const char* kBytecodeNames[] = {
    201    REGEXP_BYTECODE_LIST(DECLARE_BYTECODE_NAMES)};
    202 #undef DECLARE_BYTECODE_NAMES
    203 
    204 #define DECLARE_BYTECODE_SIZES(CamelName, ...) \
    205  RegExpBytecodeOperands<RegExpBytecode::k##CamelName>::kTotalSize,
    206 static constexpr uint8_t kBytecodeSizes[] = {
    207    REGEXP_BYTECODE_LIST(DECLARE_BYTECODE_SIZES)};
    208 #undef DECLARE_BYTECODE_SIZES
    209 
    210 }  // namespace detail
    211 
    212 // static
    213 constexpr const char* RegExpBytecodes::Name(RegExpBytecode bytecode) {
    214  return Name(ToByte(bytecode));
    215 }
    216 
    217 // static
    218 constexpr const char* RegExpBytecodes::Name(uint8_t bytecode) {
    219  DCHECK_LT(bytecode, kCount);
    220  return detail::kBytecodeNames[bytecode];
    221 }
    222 
    223 // static
    224 constexpr uint8_t RegExpBytecodes::Size(RegExpBytecode bytecode) {
    225  return Size(ToByte(bytecode));
    226 }
    227 
    228 // static
    229 constexpr uint8_t RegExpBytecodes::Size(uint8_t bytecode) {
    230  DCHECK_LT(bytecode, kCount);
    231  return detail::kBytecodeSizes[bytecode];
    232 }
    233 
    234 // Checks for backwards compatibility.
    235 // TODO(pthier): Remove once we removed the old bytecode format.
    236 static_assert(kRegExpBytecodeCount == RegExpBytecodes::kCount);
    237 
    238 #define CHECK_BYTECODE_VALUE(CamelName, SnakeName, ...)                  \
    239  static_assert(RegExpBytecodes::ToByte(RegExpBytecode::k##CamelName) == \
    240                BC_##SnakeName);
    241 REGEXP_BYTECODE_LIST(CHECK_BYTECODE_VALUE)
    242 #undef CHECK_BYTECODE_VALUE
    243 
    244 #define CHECK_LENGTH(CamelName, SnakeName, ...)                        \
    245  static_assert(RegExpBytecodes::Size(RegExpBytecode::k##CamelName) == \
    246                RegExpBytecodeLength(BC_##SnakeName));
    247 REGEXP_BYTECODE_LIST(CHECK_LENGTH)
    248 #undef CHECK_LENGTH
    249 
    250 }  // namespace internal
    251 }  // namespace v8
    252 
    253 #endif  // V8_REGEXP_REGEXP_BYTECODES_INL_H_