tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

constexpr_parser.h (13587B)


      1 // Copyright 2022 The Abseil Authors
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     https://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_
     16 #define ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_
     17 
     18 #include <cassert>
     19 #include <cstdint>
     20 #include <cstdio>
     21 #include <limits>
     22 
     23 #include "absl/base/config.h"
     24 #include "absl/base/const_init.h"
     25 #include "absl/base/optimization.h"
     26 #include "absl/strings/internal/str_format/extension.h"
     27 
     28 namespace absl {
     29 ABSL_NAMESPACE_BEGIN
     30 namespace str_format_internal {
     31 
     32 // The analyzed properties of a single specified conversion.
     33 struct UnboundConversion {
     34  // This is a user defined default constructor on purpose to skip the
     35  // initialization of parts of the object that are not necessary.
     36  UnboundConversion() {}  // NOLINT
     37 
     38  // This constructor is provided for the static checker. We don't want to do
     39  // the unnecessary initialization in the normal case.
     40  explicit constexpr UnboundConversion(absl::ConstInitType)
     41      : arg_position{}, width{}, precision{} {}
     42 
     43  class InputValue {
     44   public:
     45    constexpr void set_value(int value) {
     46      assert(value >= 0);
     47      value_ = value;
     48    }
     49    constexpr int value() const { return value_; }
     50 
     51    // Marks the value as "from arg". aka the '*' format.
     52    // Requires `value >= 1`.
     53    // When set, is_from_arg() return true and get_from_arg() returns the
     54    // original value.
     55    // `value()`'s return value is unspecified in this state.
     56    constexpr void set_from_arg(int value) {
     57      assert(value > 0);
     58      value_ = -value - 1;
     59    }
     60    constexpr bool is_from_arg() const { return value_ < -1; }
     61    constexpr int get_from_arg() const {
     62      assert(is_from_arg());
     63      return -value_ - 1;
     64    }
     65 
     66   private:
     67    int value_ = -1;
     68  };
     69 
     70  // No need to initialize. It will always be set in the parser.
     71  int arg_position;
     72 
     73  InputValue width;
     74  InputValue precision;
     75 
     76  Flags flags = Flags::kBasic;
     77  LengthMod length_mod = LengthMod::none;
     78  FormatConversionChar conv = FormatConversionCharInternal::kNone;
     79 };
     80 
     81 // Helper tag class for the table below.
     82 // It allows fast `char -> ConversionChar/LengthMod/Flags` checking and
     83 // conversions.
     84 class ConvTag {
     85 public:
     86  constexpr ConvTag(FormatConversionChar conversion_char)  // NOLINT
     87      : tag_(static_cast<uint8_t>(conversion_char)) {}
     88  constexpr ConvTag(LengthMod length_mod)  // NOLINT
     89      : tag_(0x80 | static_cast<uint8_t>(length_mod)) {}
     90  constexpr ConvTag(Flags flags)  // NOLINT
     91      : tag_(0xc0 | static_cast<uint8_t>(flags)) {}
     92  constexpr ConvTag() : tag_(0xFF) {}
     93 
     94  constexpr bool is_conv() const { return (tag_ & 0x80) == 0; }
     95  constexpr bool is_length() const { return (tag_ & 0xC0) == 0x80; }
     96  constexpr bool is_flags() const { return (tag_ & 0xE0) == 0xC0; }
     97 
     98  constexpr FormatConversionChar as_conv() const {
     99    assert(is_conv());
    100    assert(!is_length());
    101    assert(!is_flags());
    102    return static_cast<FormatConversionChar>(tag_);
    103  }
    104  constexpr LengthMod as_length() const {
    105    assert(!is_conv());
    106    assert(is_length());
    107    assert(!is_flags());
    108    return static_cast<LengthMod>(tag_ & 0x3F);
    109  }
    110  constexpr Flags as_flags() const {
    111    assert(!is_conv());
    112    assert(!is_length());
    113    assert(is_flags());
    114    return static_cast<Flags>(tag_ & 0x1F);
    115  }
    116 
    117 private:
    118  uint8_t tag_;
    119 };
    120 
    121 struct ConvTagHolder {
    122  using CC = FormatConversionCharInternal;
    123  using LM = LengthMod;
    124 
    125  // Abbreviations to fit in the table below.
    126  static constexpr auto kFSign = Flags::kSignCol;
    127  static constexpr auto kFAlt = Flags::kAlt;
    128  static constexpr auto kFPos = Flags::kShowPos;
    129  static constexpr auto kFLeft = Flags::kLeft;
    130  static constexpr auto kFZero = Flags::kZero;
    131 
    132  static constexpr ConvTag value[256] = {
    133      {},     {},    {},    {},    {},    {},     {},    {},     // 00-07
    134      {},     {},    {},    {},    {},    {},     {},    {},     // 08-0f
    135      {},     {},    {},    {},    {},    {},     {},    {},     // 10-17
    136      {},     {},    {},    {},    {},    {},     {},    {},     // 18-1f
    137      kFSign, {},    {},    kFAlt, {},    {},     {},    {},     //  !"#$%&'
    138      {},     {},    {},    kFPos, {},    kFLeft, {},    {},     // ()*+,-./
    139      kFZero, {},    {},    {},    {},    {},     {},    {},     // 01234567
    140      {},     {},    {},    {},    {},    {},     {},    {},     // 89:;<=>?
    141      {},     CC::A, {},    {},    {},    CC::E,  CC::F, CC::G,  // @ABCDEFG
    142      {},     {},    {},    {},    LM::L, {},     {},    {},     // HIJKLMNO
    143      {},     {},    {},    {},    {},    {},     {},    {},     // PQRSTUVW
    144      CC::X,  {},    {},    {},    {},    {},     {},    {},     // XYZ[\]^_
    145      {},     CC::a, {},    CC::c, CC::d, CC::e,  CC::f, CC::g,  // `abcdefg
    146      LM::h,  CC::i, LM::j, {},    LM::l, {},     CC::n, CC::o,  // hijklmno
    147      CC::p,  LM::q, {},    CC::s, LM::t, CC::u,  CC::v, {},     // pqrstuvw
    148      CC::x,  {},    LM::z, {},    {},    {},     {},    {},     // xyz{|}!
    149      {},     {},    {},    {},    {},    {},     {},    {},     // 80-87
    150      {},     {},    {},    {},    {},    {},     {},    {},     // 88-8f
    151      {},     {},    {},    {},    {},    {},     {},    {},     // 90-97
    152      {},     {},    {},    {},    {},    {},     {},    {},     // 98-9f
    153      {},     {},    {},    {},    {},    {},     {},    {},     // a0-a7
    154      {},     {},    {},    {},    {},    {},     {},    {},     // a8-af
    155      {},     {},    {},    {},    {},    {},     {},    {},     // b0-b7
    156      {},     {},    {},    {},    {},    {},     {},    {},     // b8-bf
    157      {},     {},    {},    {},    {},    {},     {},    {},     // c0-c7
    158      {},     {},    {},    {},    {},    {},     {},    {},     // c8-cf
    159      {},     {},    {},    {},    {},    {},     {},    {},     // d0-d7
    160      {},     {},    {},    {},    {},    {},     {},    {},     // d8-df
    161      {},     {},    {},    {},    {},    {},     {},    {},     // e0-e7
    162      {},     {},    {},    {},    {},    {},     {},    {},     // e8-ef
    163      {},     {},    {},    {},    {},    {},     {},    {},     // f0-f7
    164      {},     {},    {},    {},    {},    {},     {},    {},     // f8-ff
    165  };
    166 };
    167 
    168 // Keep a single table for all the conversion chars and length modifiers.
    169 constexpr ConvTag GetTagForChar(char c) {
    170  return ConvTagHolder::value[static_cast<unsigned char>(c)];
    171 }
    172 
    173 constexpr bool CheckFastPathSetting(const UnboundConversion& conv) {
    174  bool width_precision_needed =
    175      conv.width.value() >= 0 || conv.precision.value() >= 0;
    176  if (width_precision_needed && conv.flags == Flags::kBasic) {
    177 #if defined(__clang__)
    178    // Some compilers complain about this in constexpr even when not executed,
    179    // so only enable the error dump in clang.
    180    fprintf(stderr,
    181            "basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d "
    182            "width=%d precision=%d\n",
    183            conv.flags == Flags::kBasic ? 1 : 0,
    184            FlagsContains(conv.flags, Flags::kLeft) ? 1 : 0,
    185            FlagsContains(conv.flags, Flags::kShowPos) ? 1 : 0,
    186            FlagsContains(conv.flags, Flags::kSignCol) ? 1 : 0,
    187            FlagsContains(conv.flags, Flags::kAlt) ? 1 : 0,
    188            FlagsContains(conv.flags, Flags::kZero) ? 1 : 0, conv.width.value(),
    189            conv.precision.value());
    190 #endif  // defined(__clang__)
    191    return false;
    192  }
    193  return true;
    194 }
    195 
    196 constexpr int ParseDigits(char& c, const char*& pos, const char* const end) {
    197  int digits = c - '0';
    198  // We do not want to overflow `digits` so we consume at most digits10
    199  // digits. If there are more digits the parsing will fail later on when the
    200  // digit doesn't match the expected characters.
    201  int num_digits = std::numeric_limits<int>::digits10;
    202  for (;;) {
    203    if (ABSL_PREDICT_FALSE(pos == end)) break;
    204    c = *pos++;
    205    if ('0' > c || c > '9') break;
    206    --num_digits;
    207    if (ABSL_PREDICT_FALSE(!num_digits)) break;
    208    digits = 10 * digits + c - '0';
    209  }
    210  return digits;
    211 }
    212 
    213 template <bool is_positional>
    214 constexpr const char* ConsumeConversion(const char* pos, const char* const end,
    215                                        UnboundConversion* conv,
    216                                        int* next_arg) {
    217  const char* const original_pos = pos;
    218  char c = 0;
    219  // Read the next char into `c` and update `pos`. Returns false if there are
    220  // no more chars to read.
    221 #define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR()          \
    222  do {                                                  \
    223    if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \
    224    c = *pos++;                                         \
    225  } while (0)
    226 
    227  if (is_positional) {
    228    ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
    229    if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
    230    conv->arg_position = ParseDigits(c, pos, end);
    231    assert(conv->arg_position > 0);
    232    if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
    233  }
    234 
    235  ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
    236 
    237  // We should start with the basic flag on.
    238  assert(conv->flags == Flags::kBasic);
    239 
    240  // Any non alpha character makes this conversion not basic.
    241  // This includes flags (-+ #0), width (1-9, *) or precision (.).
    242  // All conversion characters and length modifiers are alpha characters.
    243  if (c < 'A') {
    244    while (c <= '0') {
    245      auto tag = GetTagForChar(c);
    246      if (tag.is_flags()) {
    247        conv->flags = conv->flags | tag.as_flags();
    248        ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
    249      } else {
    250        break;
    251      }
    252    }
    253 
    254    if (c <= '9') {
    255      if (c >= '0') {
    256        int maybe_width = ParseDigits(c, pos, end);
    257        if (!is_positional && c == '$') {
    258          if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr;
    259          // Positional conversion.
    260          *next_arg = -1;
    261          return ConsumeConversion<true>(original_pos, end, conv, next_arg);
    262        }
    263        conv->flags = conv->flags | Flags::kNonBasic;
    264        conv->width.set_value(maybe_width);
    265      } else if (c == '*') {
    266        conv->flags = conv->flags | Flags::kNonBasic;
    267        ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
    268        if (is_positional) {
    269          if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
    270          conv->width.set_from_arg(ParseDigits(c, pos, end));
    271          if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
    272          ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
    273        } else {
    274          conv->width.set_from_arg(++*next_arg);
    275        }
    276      }
    277    }
    278 
    279    if (c == '.') {
    280      conv->flags = conv->flags | Flags::kNonBasic;
    281      ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
    282      if ('0' <= c && c <= '9') {
    283        conv->precision.set_value(ParseDigits(c, pos, end));
    284      } else if (c == '*') {
    285        ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
    286        if (is_positional) {
    287          if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
    288          conv->precision.set_from_arg(ParseDigits(c, pos, end));
    289          if (c != '$') return nullptr;
    290          ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
    291        } else {
    292          conv->precision.set_from_arg(++*next_arg);
    293        }
    294      } else {
    295        conv->precision.set_value(0);
    296      }
    297    }
    298  }
    299 
    300  auto tag = GetTagForChar(c);
    301 
    302  if (ABSL_PREDICT_FALSE(c == 'v' && conv->flags != Flags::kBasic)) {
    303    return nullptr;
    304  }
    305 
    306  if (ABSL_PREDICT_FALSE(!tag.is_conv())) {
    307    if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr;
    308 
    309    // It is a length modifier.
    310    LengthMod length_mod = tag.as_length();
    311    ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
    312    if (c == 'h' && length_mod == LengthMod::h) {
    313      conv->length_mod = LengthMod::hh;
    314      ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
    315    } else if (c == 'l' && length_mod == LengthMod::l) {
    316      conv->length_mod = LengthMod::ll;
    317      ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
    318    } else {
    319      conv->length_mod = length_mod;
    320    }
    321    tag = GetTagForChar(c);
    322 
    323    if (ABSL_PREDICT_FALSE(c == 'v')) return nullptr;
    324    if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr;
    325 
    326    // `wchar_t` args are marked non-basic so `Bind()` will copy the length mod.
    327    if (conv->length_mod == LengthMod::l && c == 'c') {
    328      conv->flags = conv->flags | Flags::kNonBasic;
    329    }
    330  }
    331 #undef ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR
    332 
    333  assert(CheckFastPathSetting(*conv));
    334  (void)(&CheckFastPathSetting);
    335 
    336  conv->conv = tag.as_conv();
    337  if (!is_positional) conv->arg_position = ++*next_arg;
    338  return pos;
    339 }
    340 
    341 // Consume conversion spec prefix (not including '%') of [p, end) if valid.
    342 // Examples of valid specs would be e.g.: "s", "d", "-12.6f".
    343 // If valid, it returns the first character following the conversion spec,
    344 // and the spec part is broken down and returned in 'conv'.
    345 // If invalid, returns nullptr.
    346 constexpr const char* ConsumeUnboundConversion(const char* p, const char* end,
    347                                               UnboundConversion* conv,
    348                                               int* next_arg) {
    349  if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg);
    350  return ConsumeConversion<false>(p, end, conv, next_arg);
    351 }
    352 
    353 }  // namespace str_format_internal
    354 ABSL_NAMESPACE_END
    355 }  // namespace absl
    356 
    357 #endif  // ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_