parser.h (9951B)
1 // Copyright 2020 The Abseil Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_ 16 #define ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_ 17 18 #include <stddef.h> 19 #include <stdlib.h> 20 21 #include <cassert> 22 #include <cstring> 23 #include <initializer_list> 24 #include <memory> 25 #include <string> 26 #include <utility> 27 #include <vector> 28 29 #include "absl/base/config.h" 30 #include "absl/base/optimization.h" 31 #include "absl/strings/internal/str_format/checker.h" 32 #include "absl/strings/internal/str_format/constexpr_parser.h" 33 #include "absl/strings/internal/str_format/extension.h" 34 #include "absl/strings/string_view.h" 35 36 namespace absl { 37 ABSL_NAMESPACE_BEGIN 38 namespace str_format_internal { 39 40 std::string LengthModToString(LengthMod v); 41 42 const char* ConsumeUnboundConversionNoInline(const char* p, const char* end, 43 UnboundConversion* conv, 44 int* next_arg); 45 46 // Parse the format string provided in 'src' and pass the identified items into 47 // 'consumer'. 48 // Text runs will be passed by calling 49 // Consumer::Append(string_view); 50 // ConversionItems will be passed by calling 51 // Consumer::ConvertOne(UnboundConversion, string_view); 52 // In the case of ConvertOne, the string_view that is passed is the 53 // portion of the format string corresponding to the conversion, not including 54 // the leading %. On success, it returns true. On failure, it stops and returns 55 // false. 56 template <typename Consumer> 57 bool ParseFormatString(string_view src, Consumer consumer) { 58 int next_arg = 0; 59 const char* p = src.data(); 60 const char* const end = p + src.size(); 61 while (p != end) { 62 const char* percent = 63 static_cast<const char*>(memchr(p, '%', static_cast<size_t>(end - p))); 64 if (!percent) { 65 // We found the last substring. 66 return consumer.Append(string_view(p, static_cast<size_t>(end - p))); 67 } 68 // We found a percent, so push the text run then process the percent. 69 if (ABSL_PREDICT_FALSE(!consumer.Append( 70 string_view(p, static_cast<size_t>(percent - p))))) { 71 return false; 72 } 73 if (ABSL_PREDICT_FALSE(percent + 1 >= end)) return false; 74 75 auto tag = GetTagForChar(percent[1]); 76 if (tag.is_conv()) { 77 if (ABSL_PREDICT_FALSE(next_arg < 0)) { 78 // This indicates an error in the format string. 79 // The only way to get `next_arg < 0` here is to have a positional 80 // argument first which sets next_arg to -1 and then a non-positional 81 // argument. 82 return false; 83 } 84 p = percent + 2; 85 86 // Keep this case separate from the one below. 87 // ConvertOne is more efficient when the compiler can see that the `basic` 88 // flag is set. 89 UnboundConversion conv; 90 conv.conv = tag.as_conv(); 91 conv.arg_position = ++next_arg; 92 if (ABSL_PREDICT_FALSE( 93 !consumer.ConvertOne(conv, string_view(percent + 1, 1)))) { 94 return false; 95 } 96 } else if (percent[1] != '%') { 97 UnboundConversion conv; 98 p = ConsumeUnboundConversionNoInline(percent + 1, end, &conv, &next_arg); 99 if (ABSL_PREDICT_FALSE(p == nullptr)) return false; 100 if (ABSL_PREDICT_FALSE(!consumer.ConvertOne( 101 conv, string_view(percent + 1, 102 static_cast<size_t>(p - (percent + 1)))))) { 103 return false; 104 } 105 } else { 106 if (ABSL_PREDICT_FALSE(!consumer.Append("%"))) return false; 107 p = percent + 2; 108 continue; 109 } 110 } 111 return true; 112 } 113 114 // Always returns true, or fails to compile in a constexpr context if s does not 115 // point to a constexpr char array. 116 constexpr bool EnsureConstexpr(string_view s) { 117 return s.empty() || s[0] == s[0]; 118 } 119 120 class ParsedFormatBase { 121 public: 122 explicit ParsedFormatBase( 123 string_view format, bool allow_ignored, 124 std::initializer_list<FormatConversionCharSet> convs); 125 126 ParsedFormatBase(const ParsedFormatBase& other) { *this = other; } 127 128 ParsedFormatBase(ParsedFormatBase&& other) { *this = std::move(other); } 129 130 ParsedFormatBase& operator=(const ParsedFormatBase& other) { 131 if (this == &other) return *this; 132 has_error_ = other.has_error_; 133 items_ = other.items_; 134 size_t text_size = items_.empty() ? 0 : items_.back().text_end; 135 data_ = std::make_unique<char[]>(text_size); 136 if (text_size > 0) { 137 memcpy(data_.get(), other.data_.get(), text_size); 138 } 139 return *this; 140 } 141 142 ParsedFormatBase& operator=(ParsedFormatBase&& other) { 143 if (this == &other) return *this; 144 has_error_ = other.has_error_; 145 data_ = std::move(other.data_); 146 items_ = std::move(other.items_); 147 // Reset the vector to make sure the invariants hold. 148 other.items_.clear(); 149 return *this; 150 } 151 152 template <typename Consumer> 153 bool ProcessFormat(Consumer consumer) const { 154 const char* const base = data_.get(); 155 string_view text(base, 0); 156 for (const auto& item : items_) { 157 const char* const end = text.data() + text.size(); 158 text = 159 string_view(end, static_cast<size_t>((base + item.text_end) - end)); 160 if (item.is_conversion) { 161 if (!consumer.ConvertOne(item.conv, text)) return false; 162 } else { 163 if (!consumer.Append(text)) return false; 164 } 165 } 166 return !has_error_; 167 } 168 169 bool has_error() const { return has_error_; } 170 171 private: 172 // Returns whether the conversions match and if !allow_ignored it verifies 173 // that all conversions are used by the format. 174 bool MatchesConversions( 175 bool allow_ignored, 176 std::initializer_list<FormatConversionCharSet> convs) const; 177 178 struct ParsedFormatConsumer; 179 180 struct ConversionItem { 181 bool is_conversion; 182 // Points to the past-the-end location of this element in the data_ array. 183 size_t text_end; 184 UnboundConversion conv; 185 }; 186 187 bool has_error_; 188 std::unique_ptr<char[]> data_; 189 std::vector<ConversionItem> items_; 190 }; 191 192 193 // A value type representing a preparsed format. These can be created, copied 194 // around, and reused to speed up formatting loops. 195 // The user must specify through the template arguments the conversion 196 // characters used in the format. This will be checked at compile time. 197 // 198 // This class uses Conv enum values to specify each argument. 199 // This allows for more flexibility as you can specify multiple possible 200 // conversion characters for each argument. 201 // ParsedFormat<char...> is a simplified alias for when the user only 202 // needs to specify a single conversion character for each argument. 203 // 204 // Example: 205 // // Extended format supports multiple characters per argument: 206 // using MyFormat = ExtendedParsedFormat<Conv::d | Conv::x>; 207 // MyFormat GetFormat(bool use_hex) { 208 // if (use_hex) return MyFormat("foo %x bar"); 209 // return MyFormat("foo %d bar"); 210 // } 211 // // 'format' can be used with any value that supports 'd' and 'x', 212 // // like `int`. 213 // auto format = GetFormat(use_hex); 214 // value = StringF(format, i); 215 // 216 // This class also supports runtime format checking with the ::New() and 217 // ::NewAllowIgnored() factory functions. 218 // This is the only API that allows the user to pass a runtime specified format 219 // string. These factory functions will return NULL if the format does not match 220 // the conversions requested by the user. 221 template <FormatConversionCharSet... C> 222 class ExtendedParsedFormat : public str_format_internal::ParsedFormatBase { 223 public: 224 explicit ExtendedParsedFormat(string_view format) 225 #ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER 226 __attribute__(( 227 enable_if(str_format_internal::EnsureConstexpr(format), 228 "Format string is not constexpr."), 229 enable_if(str_format_internal::ValidFormatImpl<C...>(format), 230 "Format specified does not match the template arguments."))) 231 #endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER 232 : ExtendedParsedFormat(format, false) { 233 } 234 235 // ExtendedParsedFormat factory function. 236 // The user still has to specify the conversion characters, but they will not 237 // be checked at compile time. Instead, it will be checked at runtime. 238 // This delays the checking to runtime, but allows the user to pass 239 // dynamically sourced formats. 240 // It returns NULL if the format does not match the conversion characters. 241 // The user is responsible for checking the return value before using it. 242 // 243 // The 'New' variant will check that all the specified arguments are being 244 // consumed by the format and return NULL if any argument is being ignored. 245 // The 'NewAllowIgnored' variant will not verify this and will allow formats 246 // that ignore arguments. 247 static std::unique_ptr<ExtendedParsedFormat> New(string_view format) { 248 return New(format, false); 249 } 250 static std::unique_ptr<ExtendedParsedFormat> NewAllowIgnored( 251 string_view format) { 252 return New(format, true); 253 } 254 255 private: 256 static std::unique_ptr<ExtendedParsedFormat> New(string_view format, 257 bool allow_ignored) { 258 std::unique_ptr<ExtendedParsedFormat> conv( 259 new ExtendedParsedFormat(format, allow_ignored)); 260 if (conv->has_error()) return nullptr; 261 return conv; 262 } 263 264 ExtendedParsedFormat(string_view s, bool allow_ignored) 265 : ParsedFormatBase(s, allow_ignored, {C...}) {} 266 }; 267 } // namespace str_format_internal 268 ABSL_NAMESPACE_END 269 } // namespace absl 270 271 #endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_