constexpr_parser.h (13587B)
1 // Copyright 2022 The Abseil Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_ 16 #define ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_ 17 18 #include <cassert> 19 #include <cstdint> 20 #include <cstdio> 21 #include <limits> 22 23 #include "absl/base/config.h" 24 #include "absl/base/const_init.h" 25 #include "absl/base/optimization.h" 26 #include "absl/strings/internal/str_format/extension.h" 27 28 namespace absl { 29 ABSL_NAMESPACE_BEGIN 30 namespace str_format_internal { 31 32 // The analyzed properties of a single specified conversion. 33 struct UnboundConversion { 34 // This is a user defined default constructor on purpose to skip the 35 // initialization of parts of the object that are not necessary. 36 UnboundConversion() {} // NOLINT 37 38 // This constructor is provided for the static checker. We don't want to do 39 // the unnecessary initialization in the normal case. 40 explicit constexpr UnboundConversion(absl::ConstInitType) 41 : arg_position{}, width{}, precision{} {} 42 43 class InputValue { 44 public: 45 constexpr void set_value(int value) { 46 assert(value >= 0); 47 value_ = value; 48 } 49 constexpr int value() const { return value_; } 50 51 // Marks the value as "from arg". aka the '*' format. 52 // Requires `value >= 1`. 53 // When set, is_from_arg() return true and get_from_arg() returns the 54 // original value. 55 // `value()`'s return value is unspecified in this state. 56 constexpr void set_from_arg(int value) { 57 assert(value > 0); 58 value_ = -value - 1; 59 } 60 constexpr bool is_from_arg() const { return value_ < -1; } 61 constexpr int get_from_arg() const { 62 assert(is_from_arg()); 63 return -value_ - 1; 64 } 65 66 private: 67 int value_ = -1; 68 }; 69 70 // No need to initialize. It will always be set in the parser. 71 int arg_position; 72 73 InputValue width; 74 InputValue precision; 75 76 Flags flags = Flags::kBasic; 77 LengthMod length_mod = LengthMod::none; 78 FormatConversionChar conv = FormatConversionCharInternal::kNone; 79 }; 80 81 // Helper tag class for the table below. 82 // It allows fast `char -> ConversionChar/LengthMod/Flags` checking and 83 // conversions. 84 class ConvTag { 85 public: 86 constexpr ConvTag(FormatConversionChar conversion_char) // NOLINT 87 : tag_(static_cast<uint8_t>(conversion_char)) {} 88 constexpr ConvTag(LengthMod length_mod) // NOLINT 89 : tag_(0x80 | static_cast<uint8_t>(length_mod)) {} 90 constexpr ConvTag(Flags flags) // NOLINT 91 : tag_(0xc0 | static_cast<uint8_t>(flags)) {} 92 constexpr ConvTag() : tag_(0xFF) {} 93 94 constexpr bool is_conv() const { return (tag_ & 0x80) == 0; } 95 constexpr bool is_length() const { return (tag_ & 0xC0) == 0x80; } 96 constexpr bool is_flags() const { return (tag_ & 0xE0) == 0xC0; } 97 98 constexpr FormatConversionChar as_conv() const { 99 assert(is_conv()); 100 assert(!is_length()); 101 assert(!is_flags()); 102 return static_cast<FormatConversionChar>(tag_); 103 } 104 constexpr LengthMod as_length() const { 105 assert(!is_conv()); 106 assert(is_length()); 107 assert(!is_flags()); 108 return static_cast<LengthMod>(tag_ & 0x3F); 109 } 110 constexpr Flags as_flags() const { 111 assert(!is_conv()); 112 assert(!is_length()); 113 assert(is_flags()); 114 return static_cast<Flags>(tag_ & 0x1F); 115 } 116 117 private: 118 uint8_t tag_; 119 }; 120 121 struct ConvTagHolder { 122 using CC = FormatConversionCharInternal; 123 using LM = LengthMod; 124 125 // Abbreviations to fit in the table below. 126 static constexpr auto kFSign = Flags::kSignCol; 127 static constexpr auto kFAlt = Flags::kAlt; 128 static constexpr auto kFPos = Flags::kShowPos; 129 static constexpr auto kFLeft = Flags::kLeft; 130 static constexpr auto kFZero = Flags::kZero; 131 132 static constexpr ConvTag value[256] = { 133 {}, {}, {}, {}, {}, {}, {}, {}, // 00-07 134 {}, {}, {}, {}, {}, {}, {}, {}, // 08-0f 135 {}, {}, {}, {}, {}, {}, {}, {}, // 10-17 136 {}, {}, {}, {}, {}, {}, {}, {}, // 18-1f 137 kFSign, {}, {}, kFAlt, {}, {}, {}, {}, // !"#$%&' 138 {}, {}, {}, kFPos, {}, kFLeft, {}, {}, // ()*+,-./ 139 kFZero, {}, {}, {}, {}, {}, {}, {}, // 01234567 140 {}, {}, {}, {}, {}, {}, {}, {}, // 89:;<=>? 141 {}, CC::A, {}, {}, {}, CC::E, CC::F, CC::G, // @ABCDEFG 142 {}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO 143 {}, {}, {}, {}, {}, {}, {}, {}, // PQRSTUVW 144 CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_ 145 {}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg 146 LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno 147 CC::p, LM::q, {}, CC::s, LM::t, CC::u, CC::v, {}, // pqrstuvw 148 CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}! 149 {}, {}, {}, {}, {}, {}, {}, {}, // 80-87 150 {}, {}, {}, {}, {}, {}, {}, {}, // 88-8f 151 {}, {}, {}, {}, {}, {}, {}, {}, // 90-97 152 {}, {}, {}, {}, {}, {}, {}, {}, // 98-9f 153 {}, {}, {}, {}, {}, {}, {}, {}, // a0-a7 154 {}, {}, {}, {}, {}, {}, {}, {}, // a8-af 155 {}, {}, {}, {}, {}, {}, {}, {}, // b0-b7 156 {}, {}, {}, {}, {}, {}, {}, {}, // b8-bf 157 {}, {}, {}, {}, {}, {}, {}, {}, // c0-c7 158 {}, {}, {}, {}, {}, {}, {}, {}, // c8-cf 159 {}, {}, {}, {}, {}, {}, {}, {}, // d0-d7 160 {}, {}, {}, {}, {}, {}, {}, {}, // d8-df 161 {}, {}, {}, {}, {}, {}, {}, {}, // e0-e7 162 {}, {}, {}, {}, {}, {}, {}, {}, // e8-ef 163 {}, {}, {}, {}, {}, {}, {}, {}, // f0-f7 164 {}, {}, {}, {}, {}, {}, {}, {}, // f8-ff 165 }; 166 }; 167 168 // Keep a single table for all the conversion chars and length modifiers. 169 constexpr ConvTag GetTagForChar(char c) { 170 return ConvTagHolder::value[static_cast<unsigned char>(c)]; 171 } 172 173 constexpr bool CheckFastPathSetting(const UnboundConversion& conv) { 174 bool width_precision_needed = 175 conv.width.value() >= 0 || conv.precision.value() >= 0; 176 if (width_precision_needed && conv.flags == Flags::kBasic) { 177 #if defined(__clang__) 178 // Some compilers complain about this in constexpr even when not executed, 179 // so only enable the error dump in clang. 180 fprintf(stderr, 181 "basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d " 182 "width=%d precision=%d\n", 183 conv.flags == Flags::kBasic ? 1 : 0, 184 FlagsContains(conv.flags, Flags::kLeft) ? 1 : 0, 185 FlagsContains(conv.flags, Flags::kShowPos) ? 1 : 0, 186 FlagsContains(conv.flags, Flags::kSignCol) ? 1 : 0, 187 FlagsContains(conv.flags, Flags::kAlt) ? 1 : 0, 188 FlagsContains(conv.flags, Flags::kZero) ? 1 : 0, conv.width.value(), 189 conv.precision.value()); 190 #endif // defined(__clang__) 191 return false; 192 } 193 return true; 194 } 195 196 constexpr int ParseDigits(char& c, const char*& pos, const char* const end) { 197 int digits = c - '0'; 198 // We do not want to overflow `digits` so we consume at most digits10 199 // digits. If there are more digits the parsing will fail later on when the 200 // digit doesn't match the expected characters. 201 int num_digits = std::numeric_limits<int>::digits10; 202 for (;;) { 203 if (ABSL_PREDICT_FALSE(pos == end)) break; 204 c = *pos++; 205 if ('0' > c || c > '9') break; 206 --num_digits; 207 if (ABSL_PREDICT_FALSE(!num_digits)) break; 208 digits = 10 * digits + c - '0'; 209 } 210 return digits; 211 } 212 213 template <bool is_positional> 214 constexpr const char* ConsumeConversion(const char* pos, const char* const end, 215 UnboundConversion* conv, 216 int* next_arg) { 217 const char* const original_pos = pos; 218 char c = 0; 219 // Read the next char into `c` and update `pos`. Returns false if there are 220 // no more chars to read. 221 #define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR() \ 222 do { \ 223 if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \ 224 c = *pos++; \ 225 } while (0) 226 227 if (is_positional) { 228 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); 229 if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; 230 conv->arg_position = ParseDigits(c, pos, end); 231 assert(conv->arg_position > 0); 232 if (ABSL_PREDICT_FALSE(c != '$')) return nullptr; 233 } 234 235 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); 236 237 // We should start with the basic flag on. 238 assert(conv->flags == Flags::kBasic); 239 240 // Any non alpha character makes this conversion not basic. 241 // This includes flags (-+ #0), width (1-9, *) or precision (.). 242 // All conversion characters and length modifiers are alpha characters. 243 if (c < 'A') { 244 while (c <= '0') { 245 auto tag = GetTagForChar(c); 246 if (tag.is_flags()) { 247 conv->flags = conv->flags | tag.as_flags(); 248 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); 249 } else { 250 break; 251 } 252 } 253 254 if (c <= '9') { 255 if (c >= '0') { 256 int maybe_width = ParseDigits(c, pos, end); 257 if (!is_positional && c == '$') { 258 if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr; 259 // Positional conversion. 260 *next_arg = -1; 261 return ConsumeConversion<true>(original_pos, end, conv, next_arg); 262 } 263 conv->flags = conv->flags | Flags::kNonBasic; 264 conv->width.set_value(maybe_width); 265 } else if (c == '*') { 266 conv->flags = conv->flags | Flags::kNonBasic; 267 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); 268 if (is_positional) { 269 if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; 270 conv->width.set_from_arg(ParseDigits(c, pos, end)); 271 if (ABSL_PREDICT_FALSE(c != '$')) return nullptr; 272 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); 273 } else { 274 conv->width.set_from_arg(++*next_arg); 275 } 276 } 277 } 278 279 if (c == '.') { 280 conv->flags = conv->flags | Flags::kNonBasic; 281 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); 282 if ('0' <= c && c <= '9') { 283 conv->precision.set_value(ParseDigits(c, pos, end)); 284 } else if (c == '*') { 285 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); 286 if (is_positional) { 287 if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; 288 conv->precision.set_from_arg(ParseDigits(c, pos, end)); 289 if (c != '$') return nullptr; 290 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); 291 } else { 292 conv->precision.set_from_arg(++*next_arg); 293 } 294 } else { 295 conv->precision.set_value(0); 296 } 297 } 298 } 299 300 auto tag = GetTagForChar(c); 301 302 if (ABSL_PREDICT_FALSE(c == 'v' && conv->flags != Flags::kBasic)) { 303 return nullptr; 304 } 305 306 if (ABSL_PREDICT_FALSE(!tag.is_conv())) { 307 if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr; 308 309 // It is a length modifier. 310 LengthMod length_mod = tag.as_length(); 311 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); 312 if (c == 'h' && length_mod == LengthMod::h) { 313 conv->length_mod = LengthMod::hh; 314 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); 315 } else if (c == 'l' && length_mod == LengthMod::l) { 316 conv->length_mod = LengthMod::ll; 317 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); 318 } else { 319 conv->length_mod = length_mod; 320 } 321 tag = GetTagForChar(c); 322 323 if (ABSL_PREDICT_FALSE(c == 'v')) return nullptr; 324 if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr; 325 326 // `wchar_t` args are marked non-basic so `Bind()` will copy the length mod. 327 if (conv->length_mod == LengthMod::l && c == 'c') { 328 conv->flags = conv->flags | Flags::kNonBasic; 329 } 330 } 331 #undef ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR 332 333 assert(CheckFastPathSetting(*conv)); 334 (void)(&CheckFastPathSetting); 335 336 conv->conv = tag.as_conv(); 337 if (!is_positional) conv->arg_position = ++*next_arg; 338 return pos; 339 } 340 341 // Consume conversion spec prefix (not including '%') of [p, end) if valid. 342 // Examples of valid specs would be e.g.: "s", "d", "-12.6f". 343 // If valid, it returns the first character following the conversion spec, 344 // and the spec part is broken down and returned in 'conv'. 345 // If invalid, returns nullptr. 346 constexpr const char* ConsumeUnboundConversion(const char* p, const char* end, 347 UnboundConversion* conv, 348 int* next_arg) { 349 if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg); 350 return ConsumeConversion<false>(p, end, conv, next_arg); 351 } 352 353 } // namespace str_format_internal 354 ABSL_NAMESPACE_END 355 } // namespace absl 356 357 #endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_