regexp-bytecodes-inl.h (9585B)
1 // Copyright 2025 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_REGEXP_REGEXP_BYTECODES_INL_H_ 6 #define V8_REGEXP_REGEXP_BYTECODES_INL_H_ 7 8 #include "irregexp/imported/regexp-bytecodes.h" 9 // Include the non-inl header before the rest of the headers. 10 11 #include <type_traits> 12 13 namespace v8 { 14 namespace internal { 15 16 template <RegExpBytecodeOperandType> 17 struct RegExpOperandTypeTraits; 18 19 #define DECLARE_BASIC_OPERAND_TYPE_TRAITS(Name, CType) \ 20 template <> \ 21 struct RegExpOperandTypeTraits<RegExpBytecodeOperandType::k##Name> { \ 22 static_assert(!std::is_pointer_v<CType>); \ 23 static constexpr uint8_t kSize = sizeof(CType); \ 24 using kCType = CType; \ 25 static constexpr bool kIsBasic = true; \ 26 }; 27 BASIC_BYTECODE_OPERAND_TYPE_LIST(DECLARE_BASIC_OPERAND_TYPE_TRAITS) 28 #undef DECLARE_OPERAND_TYPE_TRAITS 29 30 #define DECLARE_SPECIAL_OPERAND_TYPE_TRAITS(Name, Size) \ 31 template <> \ 32 struct RegExpOperandTypeTraits<RegExpBytecodeOperandType::k##Name> { \ 33 static constexpr uint8_t kSize = Size; \ 34 static constexpr bool kIsBasic = false; \ 35 }; 36 SPECIAL_BYTECODE_OPERAND_TYPE_LIST(DECLARE_SPECIAL_OPERAND_TYPE_TRAITS) 37 #undef DECLARE_OPERAND_TYPE_TRAITS 38 39 namespace detail { 40 41 // Bytecode is 4-byte aligned. 42 // We can pack operands if multiple operands fit into 4 bytes. 43 static constexpr int kBytecodeAlignment = 4; 44 45 // Calculates packed offsets for each Bytecode operand. 46 // The first operand can be packed together with the bytecode at an unaligned 47 // offset 1. All other operands are aligned to their own size if 48 // they are "basic" types. 49 template <RegExpBytecodeOperandType... operand_types> 50 consteval auto CalculatePackedOffsets() { 51 constexpr int N = sizeof...(operand_types); 52 constexpr std::array<uint8_t, N> kOperandSizes = { 53 RegExpOperandTypeTraits<operand_types>::kSize...}; 54 constexpr std::array<bool, N> kIsBasic = { 55 RegExpOperandTypeTraits<operand_types>::kIsBasic...}; 56 57 std::array<int, N> offsets{}; 58 int first_offset = sizeof(RegExpBytecode); 59 int offset = first_offset; 60 61 for (size_t i = 0; i < N; ++i) { 62 uint8_t operand_size = kOperandSizes[i]; 63 64 // An operand is only allowed to be unaligned, if it's packed with the 65 // bytecode. All subsequent basic operands must be aligned to their own 66 // size. 67 if (offset > first_offset && kIsBasic[i]) { 68 offset = RoundUp(offset, operand_size); 69 } 70 71 // If the operand doesn't fit into the current 4-byte block, start a new 72 // 4-byte block. 73 if ((offset % kBytecodeAlignment) + operand_size > kBytecodeAlignment) { 74 offset = RoundUp<kBytecodeAlignment>(offset); 75 } 76 77 offsets[i] = offset; 78 offset += operand_size; 79 } 80 81 return offsets; 82 } 83 84 template <RegExpBytecodeOperandType... ops> 85 struct RegExpBytecodeOperandsTraits { 86 static constexpr int kOperandCount = sizeof...(ops); 87 static constexpr std::array<RegExpBytecodeOperandType, kOperandCount> 88 kOperandTypes = {ops...}; 89 static constexpr std::array<uint8_t, kOperandCount> kOperandSizes = { 90 RegExpOperandTypeTraits<ops>::kSize...}; 91 static constexpr std::array<int, kOperandCount> kOperandOffsets = 92 CalculatePackedOffsets<ops...>(); 93 static constexpr int kSize = RoundUp<kBytecodeAlignment>( 94 kOperandCount == 0 ? sizeof(RegExpBytecode) 95 : kOperandOffsets.back() + kOperandSizes.back()); 96 }; 97 98 template <RegExpBytecode bc> 99 struct RegExpBytecodeOperandNames; 100 101 #define DECLARE_OPERAND_NAMES(CamelName, SnakeName, OpNames, OpTypes) \ 102 template <> \ 103 struct RegExpBytecodeOperandNames<RegExpBytecode::k##CamelName> { \ 104 enum class Operand { UNPAREN(OpNames) }; \ 105 using enum Operand; \ 106 }; 107 REGEXP_BYTECODE_LIST(DECLARE_OPERAND_NAMES) 108 #undef DECLARE_OPERAND_NAMES 109 110 template <RegExpBytecode bc, RegExpBytecodeOperandType... OpTypes> 111 class RegExpBytecodeOperandsBase { 112 public: 113 using Operand = RegExpBytecodeOperandNames<bc>::Operand; 114 using Traits = RegExpBytecodeOperandsTraits<OpTypes...>; 115 static constexpr int kCount = Traits::kOperandCount; 116 static constexpr int kTotalSize = Traits::kSize; 117 static consteval int Index(Operand op) { return static_cast<uint8_t>(op); } 118 static consteval int Size(Operand op) { 119 return Traits::kOperandSizes[Index(op)]; 120 } 121 static consteval int Offset(Operand op) { 122 return Traits::kOperandOffsets[Index(op)]; 123 } 124 static consteval RegExpBytecodeOperandType Type(Operand op) { 125 return Traits::kOperandTypes[Index(op)]; 126 } 127 128 private: 129 template <RegExpBytecodeOperandType OperandType> 130 requires(RegExpOperandTypeTraits<OperandType>::kIsBasic) 131 static auto GetAligned(const uint8_t* pc, int offset) { 132 DCHECK_EQ(*pc, RegExpBytecodes::ToByte(bc)); 133 using CType = RegExpOperandTypeTraits<OperandType>::kCType; 134 DCHECK(IsAligned(offset, sizeof(CType))); 135 return *reinterpret_cast<const CType*>(pc + offset); 136 } 137 138 // TODO(pthier): We can remove unaligned packing once we have fully switched 139 // to the new bytecode layout. This is for backwards-compatibility with the 140 // old layout only. 141 template <RegExpBytecodeOperandType OperandType> 142 requires(RegExpOperandTypeTraits<OperandType>::kIsBasic) 143 static auto GetPacked(const uint8_t* pc, int offset) { 144 DCHECK_EQ(*pc, RegExpBytecodes::ToByte(bc)); 145 // Only unaligned packing of 2-byte values with the bytecode is supported. 146 DCHECK_EQ(offset, 1); 147 static_assert(RegExpOperandTypeTraits<OperandType>::kSize == 2); 148 using CType = RegExpOperandTypeTraits<OperandType>::kCType; 149 DCHECK(!IsAligned(offset, sizeof(CType))); 150 int32_t packed_value = *reinterpret_cast<const int32_t*>(pc); 151 return static_cast<CType>(packed_value >> BYTECODE_SHIFT); 152 } 153 154 public: 155 template <Operand op> 156 requires(RegExpOperandTypeTraits<Type(op)>::kIsBasic) 157 static auto Get(const uint8_t* pc) { 158 constexpr RegExpBytecodeOperandType OperandType = Type(op); 159 constexpr int offset = Offset(op); 160 using CType = RegExpOperandTypeTraits<OperandType>::kCType; 161 // TODO(pthier): We can remove unaligned packing once we have fully switched 162 // to the new bytecode layout. This is for backwards-compatibility with the 163 // old layout only. 164 if constexpr (!IsAligned(offset, sizeof(CType))) { 165 return GetPacked<OperandType>(pc, offset); 166 } else { 167 return GetAligned<OperandType>(pc, offset); 168 } 169 } 170 171 template <Operand op> 172 requires(Type(op) == RegExpBytecodeOperandType::kBitTable) 173 static auto Get(const uint8_t* pc) { 174 DCHECK_EQ(*pc, RegExpBytecodes::ToByte(bc)); 175 constexpr int offset = Offset(op); 176 return pc + offset; 177 } 178 }; 179 180 } // namespace detail 181 182 #define PACK_OPTIONAL(x, ...) x __VA_OPT__(, ) __VA_ARGS__ 183 184 #define DECLARE_OPERANDS(CamelName, SnakeName, OpNames, OpTypes) \ 185 template <> \ 186 class RegExpBytecodeOperands<RegExpBytecode::k##CamelName> final \ 187 : public detail::RegExpBytecodeOperandsBase<PACK_OPTIONAL( \ 188 RegExpBytecode::k##CamelName, UNPAREN(OpTypes))>, \ 189 public AllStatic { \ 190 public: \ 191 using enum Operand; \ 192 }; 193 194 REGEXP_BYTECODE_LIST(DECLARE_OPERANDS) 195 #undef DECLARE_OPERANDS 196 197 namespace detail { 198 199 #define DECLARE_BYTECODE_NAMES(CamelName, ...) #CamelName, 200 static constexpr const char* kBytecodeNames[] = { 201 REGEXP_BYTECODE_LIST(DECLARE_BYTECODE_NAMES)}; 202 #undef DECLARE_BYTECODE_NAMES 203 204 #define DECLARE_BYTECODE_SIZES(CamelName, ...) \ 205 RegExpBytecodeOperands<RegExpBytecode::k##CamelName>::kTotalSize, 206 static constexpr uint8_t kBytecodeSizes[] = { 207 REGEXP_BYTECODE_LIST(DECLARE_BYTECODE_SIZES)}; 208 #undef DECLARE_BYTECODE_SIZES 209 210 } // namespace detail 211 212 // static 213 constexpr const char* RegExpBytecodes::Name(RegExpBytecode bytecode) { 214 return Name(ToByte(bytecode)); 215 } 216 217 // static 218 constexpr const char* RegExpBytecodes::Name(uint8_t bytecode) { 219 DCHECK_LT(bytecode, kCount); 220 return detail::kBytecodeNames[bytecode]; 221 } 222 223 // static 224 constexpr uint8_t RegExpBytecodes::Size(RegExpBytecode bytecode) { 225 return Size(ToByte(bytecode)); 226 } 227 228 // static 229 constexpr uint8_t RegExpBytecodes::Size(uint8_t bytecode) { 230 DCHECK_LT(bytecode, kCount); 231 return detail::kBytecodeSizes[bytecode]; 232 } 233 234 // Checks for backwards compatibility. 235 // TODO(pthier): Remove once we removed the old bytecode format. 236 static_assert(kRegExpBytecodeCount == RegExpBytecodes::kCount); 237 238 #define CHECK_BYTECODE_VALUE(CamelName, SnakeName, ...) \ 239 static_assert(RegExpBytecodes::ToByte(RegExpBytecode::k##CamelName) == \ 240 BC_##SnakeName); 241 REGEXP_BYTECODE_LIST(CHECK_BYTECODE_VALUE) 242 #undef CHECK_BYTECODE_VALUE 243 244 #define CHECK_LENGTH(CamelName, SnakeName, ...) \ 245 static_assert(RegExpBytecodes::Size(RegExpBytecode::k##CamelName) == \ 246 RegExpBytecodeLength(BC_##SnakeName)); 247 REGEXP_BYTECODE_LIST(CHECK_LENGTH) 248 #undef CHECK_LENGTH 249 250 } // namespace internal 251 } // namespace v8 252 253 #endif // V8_REGEXP_REGEXP_BYTECODES_INL_H_