tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

str_split.h (21602B)


      1 //
      2 // Copyright 2017 The Abseil Authors.
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      https://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 //
     16 // -----------------------------------------------------------------------------
     17 // File: str_split.h
     18 // -----------------------------------------------------------------------------
     19 //
     20 // This file contains functions for splitting strings. It defines the main
     21 // `StrSplit()` function, several delimiters for determining the boundaries on
     22 // which to split the string, and predicates for filtering delimited results.
     23 // `StrSplit()` adapts the returned collection to the type specified by the
     24 // caller.
     25 //
     26 // Example:
     27 //
     28 //   // Splits the given string on commas. Returns the results in a
     29 //   // vector of strings.
     30 //   std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
     31 //   // Can also use ","
     32 //   // v[0] == "a", v[1] == "b", v[2] == "c"
     33 //
     34 // See StrSplit() below for more information.
     35 #ifndef ABSL_STRINGS_STR_SPLIT_H_
     36 #define ABSL_STRINGS_STR_SPLIT_H_
     37 
     38 #include <algorithm>
     39 #include <cstddef>
     40 #include <map>
     41 #include <set>
     42 #include <string>
     43 #include <utility>
     44 #include <vector>
     45 
     46 #include "absl/base/internal/raw_logging.h"
     47 #include "absl/base/macros.h"
     48 #include "absl/strings/internal/str_split_internal.h"
     49 #include "absl/strings/string_view.h"
     50 #include "absl/strings/strip.h"
     51 
     52 namespace absl {
     53 ABSL_NAMESPACE_BEGIN
     54 
     55 //------------------------------------------------------------------------------
     56 // Delimiters
     57 //------------------------------------------------------------------------------
     58 //
     59 // `StrSplit()` uses delimiters to define the boundaries between elements in the
     60 // provided input. Several `Delimiter` types are defined below. If a string
     61 // (`const char*`, `std::string`, or `absl::string_view`) is passed in place of
     62 // an explicit `Delimiter` object, `StrSplit()` treats it the same way as if it
     63 // were passed a `ByString` delimiter.
     64 //
     65 // A `Delimiter` is an object with a `Find()` function that knows how to find
     66 // the first occurrence of itself in a given `absl::string_view`.
     67 //
     68 // The following `Delimiter` types are available for use within `StrSplit()`:
     69 //
     70 //   - `ByString` (default for string arguments)
     71 //   - `ByChar` (default for a char argument)
     72 //   - `ByAnyChar`
     73 //   - `ByLength`
     74 //   - `MaxSplits`
     75 //
     76 // A Delimiter's `Find()` member function will be passed an input `text` that is
     77 // to be split and a position (`pos`) to begin searching for the next delimiter
     78 // in `text`. The returned absl::string_view should refer to the next occurrence
     79 // (after `pos`) of the represented delimiter; this returned absl::string_view
     80 // represents the next location where the input `text` should be broken.
     81 //
     82 // The returned absl::string_view may be zero-length if the Delimiter does not
     83 // represent a part of the string (e.g., a fixed-length delimiter). If no
     84 // delimiter is found in the input `text`, a zero-length absl::string_view
     85 // referring to `text.end()` should be returned (e.g.,
     86 // `text.substr(text.size())`). It is important that the returned
     87 // absl::string_view always be within the bounds of the input `text` given as an
     88 // argument--it must not refer to a string that is physically located outside of
     89 // the given string.
     90 //
     91 // The following example is a simple Delimiter object that is created with a
     92 // single char and will look for that char in the text passed to the `Find()`
     93 // function:
     94 //
     95 //   struct SimpleDelimiter {
     96 //     const char c_;
     97 //     explicit SimpleDelimiter(char c) : c_(c) {}
     98 //     absl::string_view Find(absl::string_view text, size_t pos) {
     99 //       auto found = text.find(c_, pos);
    100 //       if (found == absl::string_view::npos)
    101 //         return text.substr(text.size());
    102 //
    103 //       return text.substr(found, 1);
    104 //     }
    105 //   };
    106 
    107 // ByString
    108 //
    109 // A sub-string delimiter. If `StrSplit()` is passed a string in place of a
    110 // `Delimiter` object, the string will be implicitly converted into a
    111 // `ByString` delimiter.
    112 //
    113 // Example:
    114 //
    115 //   // Because a string literal is converted to an `absl::ByString`,
    116 //   // the following two splits are equivalent.
    117 //
    118 //   std::vector<std::string> v1 = absl::StrSplit("a, b, c", ", ");
    119 //
    120 //   using absl::ByString;
    121 //   std::vector<std::string> v2 = absl::StrSplit("a, b, c",
    122 //                                                ByString(", "));
    123 //   // v[0] == "a", v[1] == "b", v[2] == "c"
    124 class ByString {
    125 public:
    126  explicit ByString(absl::string_view sp);
    127  absl::string_view Find(absl::string_view text, size_t pos) const;
    128 
    129 private:
    130  const std::string delimiter_;
    131 };
    132 
    133 // ByAsciiWhitespace
    134 //
    135 // A sub-string delimiter that splits by ASCII whitespace
    136 // (space, tab, vertical tab, formfeed, linefeed, or carriage return).
    137 // Note: you probably want to use absl::SkipEmpty() as well!
    138 //
    139 // This class is equivalent to ByAnyChar with ASCII whitespace chars.
    140 //
    141 // Example:
    142 //
    143 //   std::vector<std::string> v = absl::StrSplit(
    144 //       "a b\tc\n  d  \n", absl::ByAsciiWhitespace(), absl::SkipEmpty());
    145 //   // v[0] == "a", v[1] == "b", v[2] == "c", v[3] == "d"
    146 class ByAsciiWhitespace {
    147 public:
    148  absl::string_view Find(absl::string_view text, size_t pos) const;
    149 };
    150 
    151 // ByChar
    152 //
    153 // A single character delimiter. `ByChar` is functionally equivalent to a
    154 // 1-char string within a `ByString` delimiter, but slightly more efficient.
    155 //
    156 // Example:
    157 //
    158 //   // Because a char literal is converted to a absl::ByChar,
    159 //   // the following two splits are equivalent.
    160 //   std::vector<std::string> v1 = absl::StrSplit("a,b,c", ',');
    161 //   using absl::ByChar;
    162 //   std::vector<std::string> v2 = absl::StrSplit("a,b,c", ByChar(','));
    163 //   // v[0] == "a", v[1] == "b", v[2] == "c"
    164 //
    165 // `ByChar` is also the default delimiter if a single character is given
    166 // as the delimiter to `StrSplit()`. For example, the following calls are
    167 // equivalent:
    168 //
    169 //   std::vector<std::string> v = absl::StrSplit("a-b", '-');
    170 //
    171 //   using absl::ByChar;
    172 //   std::vector<std::string> v = absl::StrSplit("a-b", ByChar('-'));
    173 //
    174 class ByChar {
    175 public:
    176  explicit ByChar(char c) : c_(c) {}
    177  absl::string_view Find(absl::string_view text, size_t pos) const;
    178 
    179 private:
    180  char c_;
    181 };
    182 
    183 // ByAnyChar
    184 //
    185 // A delimiter that will match any of the given byte-sized characters within
    186 // its provided string.
    187 //
    188 // Note: this delimiter works with single-byte string data, but does not work
    189 // with variable-width encodings, such as UTF-8.
    190 //
    191 // Example:
    192 //
    193 //   using absl::ByAnyChar;
    194 //   std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",="));
    195 //   // v[0] == "a", v[1] == "b", v[2] == "c"
    196 //
    197 // If `ByAnyChar` is given the empty string, it behaves exactly like
    198 // `ByString` and matches each individual character in the input string.
    199 //
    200 class ByAnyChar {
    201 public:
    202  explicit ByAnyChar(absl::string_view sp);
    203  absl::string_view Find(absl::string_view text, size_t pos) const;
    204 
    205 private:
    206  const std::string delimiters_;
    207 };
    208 
    209 // ByLength
    210 //
    211 // A delimiter for splitting into equal-length strings. The length argument to
    212 // the constructor must be greater than 0.
    213 //
    214 // Note: this delimiter works with single-byte string data, but does not work
    215 // with variable-width encodings, such as UTF-8.
    216 //
    217 // Example:
    218 //
    219 //   using absl::ByLength;
    220 //   std::vector<std::string> v = absl::StrSplit("123456789", ByLength(3));
    221 
    222 //   // v[0] == "123", v[1] == "456", v[2] == "789"
    223 //
    224 // Note that the string does not have to be a multiple of the fixed split
    225 // length. In such a case, the last substring will be shorter.
    226 //
    227 //   using absl::ByLength;
    228 //   std::vector<std::string> v = absl::StrSplit("12345", ByLength(2));
    229 //
    230 //   // v[0] == "12", v[1] == "34", v[2] == "5"
    231 class ByLength {
    232 public:
    233  explicit ByLength(ptrdiff_t length);
    234  absl::string_view Find(absl::string_view text, size_t pos) const;
    235 
    236 private:
    237  const ptrdiff_t length_;
    238 };
    239 
    240 namespace strings_internal {
    241 
    242 // A traits-like metafunction for selecting the default Delimiter object type
    243 // for a particular Delimiter type. The base case simply exposes type Delimiter
    244 // itself as the delimiter's Type. However, there are specializations for
    245 // string-like objects that map them to the ByString delimiter object.
    246 // This allows functions like absl::StrSplit() and absl::MaxSplits() to accept
    247 // string-like objects (e.g., ',') as delimiter arguments but they will be
    248 // treated as if a ByString delimiter was given.
    249 template <typename Delimiter>
    250 struct SelectDelimiter {
    251  using type = Delimiter;
    252 };
    253 
    254 template <>
    255 struct SelectDelimiter<char> {
    256  using type = ByChar;
    257 };
    258 template <>
    259 struct SelectDelimiter<char*> {
    260  using type = ByString;
    261 };
    262 template <>
    263 struct SelectDelimiter<const char*> {
    264  using type = ByString;
    265 };
    266 template <>
    267 struct SelectDelimiter<absl::string_view> {
    268  using type = ByString;
    269 };
    270 template <>
    271 struct SelectDelimiter<std::string> {
    272  using type = ByString;
    273 };
    274 
    275 // Wraps another delimiter and sets a max number of matches for that delimiter.
    276 template <typename Delimiter>
    277 class MaxSplitsImpl {
    278 public:
    279  MaxSplitsImpl(Delimiter delimiter, int limit)
    280      : delimiter_(delimiter), limit_(limit), count_(0) {}
    281  absl::string_view Find(absl::string_view text, size_t pos) {
    282    if (count_++ == limit_) {
    283      return absl::string_view(text.data() + text.size(),
    284                               0);  // No more matches.
    285    }
    286    return delimiter_.Find(text, pos);
    287  }
    288 
    289 private:
    290  Delimiter delimiter_;
    291  const int limit_;
    292  int count_;
    293 };
    294 
    295 }  // namespace strings_internal
    296 
    297 // MaxSplits()
    298 //
    299 // A delimiter that limits the number of matches which can occur to the passed
    300 // `limit`. The last element in the returned collection will contain all
    301 // remaining unsplit pieces, which may contain instances of the delimiter.
    302 // The collection will contain at most `limit` + 1 elements.
    303 // Example:
    304 //
    305 //   using absl::MaxSplits;
    306 //   std::vector<std::string> v = absl::StrSplit("a,b,c", MaxSplits(',', 1));
    307 //
    308 //   // v[0] == "a", v[1] == "b,c"
    309 template <typename Delimiter>
    310 inline strings_internal::MaxSplitsImpl<
    311    typename strings_internal::SelectDelimiter<Delimiter>::type>
    312 MaxSplits(Delimiter delimiter, int limit) {
    313  typedef
    314      typename strings_internal::SelectDelimiter<Delimiter>::type DelimiterType;
    315  return strings_internal::MaxSplitsImpl<DelimiterType>(
    316      DelimiterType(delimiter), limit);
    317 }
    318 
    319 //------------------------------------------------------------------------------
    320 // Predicates
    321 //------------------------------------------------------------------------------
    322 //
    323 // Predicates filter the results of a `StrSplit()` by determining whether or not
    324 // a resultant element is included in the result set. A predicate may be passed
    325 // as an optional third argument to the `StrSplit()` function.
    326 //
    327 // Predicates are unary functions (or functors) that take a single
    328 // `absl::string_view` argument and return a bool indicating whether the
    329 // argument should be included (`true`) or excluded (`false`).
    330 //
    331 // Predicates are useful when filtering out empty substrings. By default, empty
    332 // substrings may be returned by `StrSplit()`, which is similar to the way split
    333 // functions work in other programming languages.
    334 
    335 // AllowEmpty()
    336 //
    337 // Always returns `true`, indicating that all strings--including empty
    338 // strings--should be included in the split output. This predicate is not
    339 // strictly needed because this is the default behavior of `StrSplit()`;
    340 // however, it might be useful at some call sites to make the intent explicit.
    341 //
    342 // Example:
    343 //
    344 //  std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', AllowEmpty());
    345 //
    346 //  // v[0] == " a ", v[1] == " ", v[2] == "", v[3] = "b", v[4] == ""
    347 struct AllowEmpty {
    348  bool operator()(absl::string_view) const { return true; }
    349 };
    350 
    351 // SkipEmpty()
    352 //
    353 // Returns `false` if the given `absl::string_view` is empty, indicating that
    354 // `StrSplit()` should omit the empty string.
    355 //
    356 // Example:
    357 //
    358 //   std::vector<std::string> v = absl::StrSplit(",a,,b,", ',', SkipEmpty());
    359 //
    360 //   // v[0] == "a", v[1] == "b"
    361 //
    362 // Note: `SkipEmpty()` does not consider a string containing only whitespace
    363 // to be empty. To skip such whitespace as well, use the `SkipWhitespace()`
    364 // predicate.
    365 struct SkipEmpty {
    366  bool operator()(absl::string_view sp) const { return !sp.empty(); }
    367 };
    368 
    369 // SkipWhitespace()
    370 //
    371 // Returns `false` if the given `absl::string_view` is empty *or* contains only
    372 // whitespace, indicating that `StrSplit()` should omit the string.
    373 //
    374 // Example:
    375 //
    376 //   std::vector<std::string> v = absl::StrSplit(" a , ,,b,",
    377 //                                               ',', SkipWhitespace());
    378 //   // v[0] == " a ", v[1] == "b"
    379 //
    380 //   // SkipEmpty() would return whitespace elements
    381 //   std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', SkipEmpty());
    382 //   // v[0] == " a ", v[1] == " ", v[2] == "b"
    383 struct SkipWhitespace {
    384  bool operator()(absl::string_view sp) const {
    385    sp = absl::StripAsciiWhitespace(sp);
    386    return !sp.empty();
    387  }
    388 };
    389 
    390 template <typename T>
    391 using EnableSplitIfString =
    392    typename std::enable_if<std::is_same<T, std::string>::value ||
    393                            std::is_same<T, const std::string>::value,
    394                            int>::type;
    395 
    396 //------------------------------------------------------------------------------
    397 //                                  StrSplit()
    398 //------------------------------------------------------------------------------
    399 
    400 // StrSplit()
    401 //
    402 // Splits a given string based on the provided `Delimiter` object, returning the
    403 // elements within the type specified by the caller. Optionally, you may pass a
    404 // `Predicate` to `StrSplit()` indicating whether to include or exclude the
    405 // resulting element within the final result set. (See the overviews for
    406 // Delimiters and Predicates above.)
    407 //
    408 // Example:
    409 //
    410 //   std::vector<std::string> v = absl::StrSplit("a,b,c,d", ',');
    411 //   // v[0] == "a", v[1] == "b", v[2] == "c", v[3] == "d"
    412 //
    413 // You can also provide an explicit `Delimiter` object:
    414 //
    415 // Example:
    416 //
    417 //   using absl::ByAnyChar;
    418 //   std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",="));
    419 //   // v[0] == "a", v[1] == "b", v[2] == "c"
    420 //
    421 // See above for more information on delimiters.
    422 //
    423 // By default, empty strings are included in the result set. You can optionally
    424 // include a third `Predicate` argument to apply a test for whether the
    425 // resultant element should be included in the result set:
    426 //
    427 // Example:
    428 //
    429 //   std::vector<std::string> v = absl::StrSplit(" a , ,,b,",
    430 //                                               ',', SkipWhitespace());
    431 //   // v[0] == " a ", v[1] == "b"
    432 //
    433 // See above for more information on predicates.
    434 //
    435 //------------------------------------------------------------------------------
    436 // StrSplit() Return Types
    437 //------------------------------------------------------------------------------
    438 //
    439 // The `StrSplit()` function adapts the returned collection to the collection
    440 // specified by the caller (e.g. `std::vector` above). The returned collections
    441 // may contain `std::string`, `absl::string_view` (in which case the original
    442 // string being split must ensure that it outlives the collection), or any
    443 // object that can be explicitly created from an `absl::string_view`. This
    444 // behavior works for:
    445 //
    446 // 1) All standard STL containers including `std::vector`, `std::list`,
    447 //    `std::deque`, `std::set`,`std::multiset`, 'std::map`, and `std::multimap`.
    448 // 2) `std::pair` (which is not actually a container). See below.
    449 // 3) `std::array`, which is a container but has different behavior due to its
    450 //    fixed size. See below.
    451 //
    452 // Example:
    453 //
    454 //   // The results are returned as `absl::string_view` objects. Note that we
    455 //   // have to ensure that the input string outlives any results.
    456 //   std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
    457 //
    458 //   // Stores results in a std::set<std::string>, which also performs
    459 //   // de-duplication and orders the elements in ascending order.
    460 //   std::set<std::string> a = absl::StrSplit("b,a,c,a,b", ',');
    461 //   // a[0] == "a", a[1] == "b", a[2] == "c"
    462 //
    463 //   // `StrSplit()` can be used within a range-based for loop, in which case
    464 //   // each element will be of type `absl::string_view`.
    465 //   std::vector<std::string> v;
    466 //   for (const auto sv : absl::StrSplit("a,b,c", ',')) {
    467 //     if (sv != "b") v.emplace_back(sv);
    468 //   }
    469 //   // v[0] == "a", v[1] == "c"
    470 //
    471 //   // Stores results in a map. The map implementation assumes that the input
    472 //   // is provided as a series of key/value pairs. For example, the 0th element
    473 //   // resulting from the split will be stored as a key to the 1st element. If
    474 //   // an odd number of elements are resolved, the last element is paired with
    475 //   // a default-constructed value (e.g., empty string).
    476 //   std::map<std::string, std::string> m = absl::StrSplit("a,b,c", ',');
    477 //   // m["a"] == "b", m["c"] == ""     // last component value equals ""
    478 //
    479 // Splitting to `std::pair` is an interesting case because it can hold only two
    480 // elements and is not a collection type. When splitting to a `std::pair` the
    481 // first two split strings become the `std::pair` `.first` and `.second`
    482 // members, respectively. The remaining split substrings are discarded. If there
    483 // are less than two split substrings, the empty string is used for the
    484 // corresponding `std::pair` member.
    485 //
    486 // Example:
    487 //
    488 //   // Stores first two split strings as the members in a std::pair.
    489 //   std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
    490 //   // p.first == "a", p.second == "b"       // "c" is omitted.
    491 //
    492 //
    493 // Splitting to `std::array` is similar to splitting to `std::pair`, but for
    494 // N elements instead of two; missing elements are filled with the empty string
    495 // and extra elements are discarded.
    496 //
    497 // Examples:
    498 //
    499 //   // Stores first two split strings as the elements in a std::array.
    500 //   std::array<std::string, 2> a = absl::StrSplit("a,b,c", ',');
    501 //   // a[0] == "a", a[1] == "b"   // "c" is omitted.
    502 //
    503 //   // The second element is empty.
    504 //   std::array<std::string, 2> a = absl::StrSplit("a,", ',');
    505 //   // a[0] == "a", a[1] == ""
    506 //
    507 // The `StrSplit()` function can be used multiple times to perform more
    508 // complicated splitting logic, such as intelligently parsing key-value pairs.
    509 //
    510 // Example:
    511 //
    512 //   // The input string "a=b=c,d=e,f=,g" becomes
    513 //   // { "a" => "b=c", "d" => "e", "f" => "", "g" => "" }
    514 //   std::map<std::string, std::string> m;
    515 //   for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) {
    516 //     m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1)));
    517 //   }
    518 //   EXPECT_EQ("b=c", m.find("a")->second);
    519 //   EXPECT_EQ("e", m.find("d")->second);
    520 //   EXPECT_EQ("", m.find("f")->second);
    521 //   EXPECT_EQ("", m.find("g")->second);
    522 //
    523 // WARNING: Due to a legacy bug that is maintained for backward compatibility,
    524 // splitting the following empty string_views produces different results:
    525 //
    526 //   absl::StrSplit(absl::string_view(""), '-');  // {""}
    527 //   absl::StrSplit(absl::string_view(), '-');    // {}, but should be {""}
    528 //
    529 // Try not to depend on this distinction because the bug may one day be fixed.
    530 template <typename Delimiter>
    531 strings_internal::Splitter<
    532    typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty,
    533    absl::string_view>
    534 StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d) {
    535  using DelimiterType =
    536      typename strings_internal::SelectDelimiter<Delimiter>::type;
    537  return strings_internal::Splitter<DelimiterType, AllowEmpty,
    538                                    absl::string_view>(
    539      text.value(), DelimiterType(d), AllowEmpty());
    540 }
    541 
    542 template <typename Delimiter, typename StringType,
    543          EnableSplitIfString<StringType> = 0>
    544 strings_internal::Splitter<
    545    typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty,
    546    std::string>
    547 StrSplit(StringType&& text, Delimiter d) {
    548  using DelimiterType =
    549      typename strings_internal::SelectDelimiter<Delimiter>::type;
    550  return strings_internal::Splitter<DelimiterType, AllowEmpty, std::string>(
    551      std::move(text), DelimiterType(d), AllowEmpty());
    552 }
    553 
    554 template <typename Delimiter, typename Predicate>
    555 strings_internal::Splitter<
    556    typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate,
    557    absl::string_view>
    558 StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d,
    559         Predicate p) {
    560  using DelimiterType =
    561      typename strings_internal::SelectDelimiter<Delimiter>::type;
    562  return strings_internal::Splitter<DelimiterType, Predicate,
    563                                    absl::string_view>(
    564      text.value(), DelimiterType(std::move(d)), std::move(p));
    565 }
    566 
    567 template <typename Delimiter, typename Predicate, typename StringType,
    568          EnableSplitIfString<StringType> = 0>
    569 strings_internal::Splitter<
    570    typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate,
    571    std::string>
    572 StrSplit(StringType&& text, Delimiter d, Predicate p) {
    573  using DelimiterType =
    574      typename strings_internal::SelectDelimiter<Delimiter>::type;
    575  return strings_internal::Splitter<DelimiterType, Predicate, std::string>(
    576      std::move(text), DelimiterType(d), std::move(p));
    577 }
    578 
    579 ABSL_NAMESPACE_END
    580 }  // namespace absl
    581 
    582 #endif  // ABSL_STRINGS_STR_SPLIT_H_