str_split_test.cc (35319B)
1 // Copyright 2017 The Abseil Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "absl/strings/str_split.h" 16 17 #include <array> 18 #include <cstddef> 19 #include <cstdint> 20 #include <deque> 21 #include <initializer_list> 22 #include <list> 23 #include <map> 24 #include <memory> 25 #include <set> 26 #include <string> 27 #include <unordered_map> 28 #include <unordered_set> 29 #include <utility> 30 #include <vector> 31 32 #include "gmock/gmock.h" 33 #include "gtest/gtest.h" 34 #include "absl/base/macros.h" 35 #include "absl/container/btree_map.h" 36 #include "absl/container/btree_set.h" 37 #include "absl/container/flat_hash_map.h" 38 #include "absl/container/node_hash_map.h" 39 #include "absl/strings/string_view.h" 40 41 namespace { 42 43 using ::testing::ElementsAre; 44 using ::testing::IsEmpty; 45 using ::testing::Pair; 46 using ::testing::UnorderedElementsAre; 47 48 TEST(Split, TraitsTest) { 49 static_assert(!absl::strings_internal::SplitterIsConvertibleTo<int>::value, 50 ""); 51 static_assert( 52 !absl::strings_internal::SplitterIsConvertibleTo<std::string>::value, ""); 53 static_assert(absl::strings_internal::SplitterIsConvertibleTo< 54 std::vector<std::string>>::value, 55 ""); 56 static_assert( 57 !absl::strings_internal::SplitterIsConvertibleTo<std::vector<int>>::value, 58 ""); 59 static_assert(absl::strings_internal::SplitterIsConvertibleTo< 60 std::vector<absl::string_view>>::value, 61 ""); 62 static_assert(absl::strings_internal::SplitterIsConvertibleTo< 63 std::map<std::string, std::string>>::value, 64 ""); 65 static_assert(absl::strings_internal::SplitterIsConvertibleTo< 66 std::map<absl::string_view, absl::string_view>>::value, 67 ""); 68 static_assert(!absl::strings_internal::SplitterIsConvertibleTo< 69 std::map<int, std::string>>::value, 70 ""); 71 static_assert(!absl::strings_internal::SplitterIsConvertibleTo< 72 std::map<std::string, int>>::value, 73 ""); 74 } 75 76 // This tests the overall split API, which is made up of the absl::StrSplit() 77 // function and the Delimiter objects in the absl:: namespace. 78 // This TEST macro is outside of any namespace to require full specification of 79 // namespaces just like callers will need to use. 80 TEST(Split, APIExamples) { 81 { 82 // Passes string delimiter. Assumes the default of ByString. 83 std::vector<std::string> v = absl::StrSplit("a,b,c", ","); // NOLINT 84 EXPECT_THAT(v, ElementsAre("a", "b", "c")); 85 86 // Equivalent to... 87 using absl::ByString; 88 v = absl::StrSplit("a,b,c", ByString(",")); 89 EXPECT_THAT(v, ElementsAre("a", "b", "c")); 90 91 // Equivalent to... 92 EXPECT_THAT(absl::StrSplit("a,b,c", ByString(",")), 93 ElementsAre("a", "b", "c")); 94 } 95 96 { 97 // Same as above, but using a single character as the delimiter. 98 std::vector<std::string> v = absl::StrSplit("a,b,c", ','); 99 EXPECT_THAT(v, ElementsAre("a", "b", "c")); 100 101 // Equivalent to... 102 using absl::ByChar; 103 v = absl::StrSplit("a,b,c", ByChar(',')); 104 EXPECT_THAT(v, ElementsAre("a", "b", "c")); 105 } 106 107 { 108 // Uses the Literal string "=>" as the delimiter. 109 const std::vector<std::string> v = absl::StrSplit("a=>b=>c", "=>"); 110 EXPECT_THAT(v, ElementsAre("a", "b", "c")); 111 } 112 113 { 114 // The substrings are returned as string_views, eliminating copying. 115 std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ','); 116 EXPECT_THAT(v, ElementsAre("a", "b", "c")); 117 } 118 119 { 120 // Leading and trailing empty substrings. 121 std::vector<std::string> v = absl::StrSplit(",a,b,c,", ','); 122 EXPECT_THAT(v, ElementsAre("", "a", "b", "c", "")); 123 } 124 125 { 126 // Splits on a delimiter that is not found. 127 std::vector<std::string> v = absl::StrSplit("abc", ','); 128 EXPECT_THAT(v, ElementsAre("abc")); 129 } 130 131 { 132 // Splits the input string into individual characters by using an empty 133 // string as the delimiter. 134 std::vector<std::string> v = absl::StrSplit("abc", ""); 135 EXPECT_THAT(v, ElementsAre("a", "b", "c")); 136 } 137 138 { 139 // Splits string data with embedded NUL characters, using NUL as the 140 // delimiter. A simple delimiter of "\0" doesn't work because strlen() will 141 // say that's the empty string when constructing the absl::string_view 142 // delimiter. Instead, a non-empty string containing NUL can be used as the 143 // delimiter. 144 std::string embedded_nulls("a\0b\0c", 5); 145 std::string null_delim("\0", 1); 146 std::vector<std::string> v = absl::StrSplit(embedded_nulls, null_delim); 147 EXPECT_THAT(v, ElementsAre("a", "b", "c")); 148 } 149 150 { 151 // Stores first two split strings as the members in a std::pair. 152 std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ','); 153 EXPECT_EQ("a", p.first); 154 EXPECT_EQ("b", p.second); 155 // "c" is omitted because std::pair can hold only two elements. 156 } 157 158 { 159 // Results stored in std::set<std::string> 160 std::set<std::string> v = absl::StrSplit("a,b,c,a,b,c,a,b,c", ','); 161 EXPECT_THAT(v, ElementsAre("a", "b", "c")); 162 } 163 164 { 165 // Uses a non-const char* delimiter. 166 char a[] = ","; 167 char* d = a + 0; 168 std::vector<std::string> v = absl::StrSplit("a,b,c", d); 169 EXPECT_THAT(v, ElementsAre("a", "b", "c")); 170 } 171 172 { 173 // Results split using either of , or ; 174 using absl::ByAnyChar; 175 std::vector<std::string> v = absl::StrSplit("a,b;c", ByAnyChar(",;")); 176 EXPECT_THAT(v, ElementsAre("a", "b", "c")); 177 } 178 179 { 180 // Uses the SkipWhitespace predicate. 181 using absl::SkipWhitespace; 182 std::vector<std::string> v = 183 absl::StrSplit(" a , ,,b,", ',', SkipWhitespace()); 184 EXPECT_THAT(v, ElementsAre(" a ", "b")); 185 } 186 187 { 188 // Uses the ByLength delimiter. 189 using absl::ByLength; 190 std::vector<std::string> v = absl::StrSplit("abcdefg", ByLength(3)); 191 EXPECT_THAT(v, ElementsAre("abc", "def", "g")); 192 } 193 194 { 195 // Different forms of initialization / conversion. 196 std::vector<std::string> v1 = absl::StrSplit("a,b,c", ','); 197 EXPECT_THAT(v1, ElementsAre("a", "b", "c")); 198 std::vector<std::string> v2(absl::StrSplit("a,b,c", ',')); 199 EXPECT_THAT(v2, ElementsAre("a", "b", "c")); 200 auto v3 = std::vector<std::string>(absl::StrSplit("a,b,c", ',')); 201 EXPECT_THAT(v3, ElementsAre("a", "b", "c")); 202 v3 = absl::StrSplit("a,b,c", ','); 203 EXPECT_THAT(v3, ElementsAre("a", "b", "c")); 204 } 205 206 { 207 // Results stored in a std::map. 208 std::map<std::string, std::string> m = absl::StrSplit("a,1,b,2,a,3", ','); 209 EXPECT_EQ(2, m.size()); 210 EXPECT_EQ("3", m["a"]); 211 EXPECT_EQ("2", m["b"]); 212 } 213 214 { 215 // Results stored in a std::multimap. 216 std::multimap<std::string, std::string> m = 217 absl::StrSplit("a,1,b,2,a,3", ','); 218 EXPECT_EQ(3, m.size()); 219 auto it = m.find("a"); 220 EXPECT_EQ("1", it->second); 221 ++it; 222 EXPECT_EQ("3", it->second); 223 it = m.find("b"); 224 EXPECT_EQ("2", it->second); 225 } 226 227 { 228 // Demonstrates use in a range-based for loop in C++11. 229 std::string s = "x,x,x,x,x,x,x"; 230 for (absl::string_view sp : absl::StrSplit(s, ',')) { 231 EXPECT_EQ("x", sp); 232 } 233 } 234 235 { 236 // Demonstrates use with a Predicate in a range-based for loop. 237 using absl::SkipWhitespace; 238 std::string s = " ,x,,x,,x,x,x,,"; 239 for (absl::string_view sp : absl::StrSplit(s, ',', SkipWhitespace())) { 240 EXPECT_EQ("x", sp); 241 } 242 } 243 244 { 245 // Demonstrates a "smart" split to std::map using two separate calls to 246 // absl::StrSplit. One call to split the records, and another call to split 247 // the keys and values. This also uses the Limit delimiter so that the 248 // std::string "a=b=c" will split to "a" -> "b=c". 249 std::map<std::string, std::string> m; 250 for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) { 251 m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1))); 252 } 253 EXPECT_EQ("b=c", m.find("a")->second); 254 EXPECT_EQ("e", m.find("d")->second); 255 EXPECT_EQ("", m.find("f")->second); 256 EXPECT_EQ("", m.find("g")->second); 257 } 258 } 259 260 // 261 // Tests for SplitIterator 262 // 263 264 TEST(SplitIterator, Basics) { 265 auto splitter = absl::StrSplit("a,b", ','); 266 auto it = splitter.begin(); 267 auto end = splitter.end(); 268 269 EXPECT_NE(it, end); 270 EXPECT_EQ("a", *it); // tests dereference 271 ++it; // tests preincrement 272 EXPECT_NE(it, end); 273 EXPECT_EQ("b", 274 std::string(it->data(), it->size())); // tests dereference as ptr 275 it++; // tests postincrement 276 EXPECT_EQ(it, end); 277 } 278 279 // Simple Predicate to skip a particular string. 280 class Skip { 281 public: 282 explicit Skip(const std::string& s) : s_(s) {} 283 bool operator()(absl::string_view sp) { return sp != s_; } 284 285 private: 286 std::string s_; 287 }; 288 289 TEST(SplitIterator, Predicate) { 290 auto splitter = absl::StrSplit("a,b,c", ',', Skip("b")); 291 auto it = splitter.begin(); 292 auto end = splitter.end(); 293 294 EXPECT_NE(it, end); 295 EXPECT_EQ("a", *it); // tests dereference 296 ++it; // tests preincrement -- "b" should be skipped here. 297 EXPECT_NE(it, end); 298 EXPECT_EQ("c", 299 std::string(it->data(), it->size())); // tests dereference as ptr 300 it++; // tests postincrement 301 EXPECT_EQ(it, end); 302 } 303 304 TEST(SplitIterator, EdgeCases) { 305 // Expected input and output, assuming a delimiter of ',' 306 struct { 307 std::string in; 308 std::vector<std::string> expect; 309 } specs[] = { 310 {"", {""}}, 311 {"foo", {"foo"}}, 312 {",", {"", ""}}, 313 {",foo", {"", "foo"}}, 314 {"foo,", {"foo", ""}}, 315 {",foo,", {"", "foo", ""}}, 316 {"foo,bar", {"foo", "bar"}}, 317 }; 318 319 for (const auto& spec : specs) { 320 SCOPED_TRACE(spec.in); 321 auto splitter = absl::StrSplit(spec.in, ','); 322 auto it = splitter.begin(); 323 auto end = splitter.end(); 324 for (const auto& expected : spec.expect) { 325 EXPECT_NE(it, end); 326 EXPECT_EQ(expected, *it++); 327 } 328 EXPECT_EQ(it, end); 329 } 330 } 331 332 TEST(Splitter, Const) { 333 const auto splitter = absl::StrSplit("a,b,c", ','); 334 EXPECT_THAT(splitter, ElementsAre("a", "b", "c")); 335 } 336 337 TEST(Split, EmptyAndNull) { 338 // Attention: Splitting a null absl::string_view is different than splitting 339 // an empty absl::string_view even though both string_views are considered 340 // equal. This behavior is likely surprising and undesirable. However, to 341 // maintain backward compatibility, there is a small "hack" in 342 // str_split_internal.h that preserves this behavior. If that behavior is ever 343 // changed/fixed, this test will need to be updated. 344 EXPECT_THAT(absl::StrSplit(absl::string_view(""), '-'), ElementsAre("")); 345 EXPECT_THAT(absl::StrSplit(absl::string_view(), '-'), ElementsAre()); 346 } 347 348 TEST(SplitIterator, EqualityAsEndCondition) { 349 auto splitter = absl::StrSplit("a,b,c", ','); 350 auto it = splitter.begin(); 351 auto it2 = it; 352 353 // Increments it2 twice to point to "c" in the input text. 354 ++it2; 355 ++it2; 356 EXPECT_EQ("c", *it2); 357 358 // This test uses a non-end SplitIterator as the terminating condition in a 359 // for loop. This relies on SplitIterator equality for non-end SplitIterators 360 // working correctly. At this point it2 points to "c", and we use that as the 361 // "end" condition in this test. 362 std::vector<absl::string_view> v; 363 for (; it != it2; ++it) { 364 v.push_back(*it); 365 } 366 EXPECT_THAT(v, ElementsAre("a", "b")); 367 } 368 369 // 370 // Tests for Splitter 371 // 372 373 TEST(Splitter, RangeIterators) { 374 auto splitter = absl::StrSplit("a,b,c", ','); 375 std::vector<absl::string_view> output; 376 for (absl::string_view p : splitter) { 377 output.push_back(p); 378 } 379 EXPECT_THAT(output, ElementsAre("a", "b", "c")); 380 } 381 382 // Some template functions for use in testing conversion operators 383 template <typename ContainerType, typename Splitter> 384 void TestConversionOperator(const Splitter& splitter) { 385 ContainerType output = splitter; 386 EXPECT_THAT(output, UnorderedElementsAre("a", "b", "c", "d")); 387 } 388 389 template <typename MapType, typename Splitter> 390 void TestMapConversionOperator(const Splitter& splitter) { 391 MapType m = splitter; 392 EXPECT_THAT(m, UnorderedElementsAre(Pair("a", "b"), Pair("c", "d"))); 393 } 394 395 template <typename FirstType, typename SecondType, typename Splitter> 396 void TestPairConversionOperator(const Splitter& splitter) { 397 std::pair<FirstType, SecondType> p = splitter; 398 EXPECT_EQ(p, (std::pair<FirstType, SecondType>("a", "b"))); 399 } 400 401 template <typename StringType, typename Splitter> 402 void TestArrayConversionOperator(const Splitter& splitter) { 403 std::array<StringType, 2> a = splitter; 404 EXPECT_THAT(a, ElementsAre("a", "b")); 405 } 406 407 TEST(Splitter, ConversionOperator) { 408 auto splitter = absl::StrSplit("a,b,c,d", ','); 409 410 TestConversionOperator<std::vector<absl::string_view>>(splitter); 411 TestConversionOperator<std::vector<std::string>>(splitter); 412 TestConversionOperator<std::list<absl::string_view>>(splitter); 413 TestConversionOperator<std::list<std::string>>(splitter); 414 TestConversionOperator<std::deque<absl::string_view>>(splitter); 415 TestConversionOperator<std::deque<std::string>>(splitter); 416 TestConversionOperator<std::set<absl::string_view>>(splitter); 417 TestConversionOperator<std::set<std::string>>(splitter); 418 TestConversionOperator<std::multiset<absl::string_view>>(splitter); 419 TestConversionOperator<std::multiset<std::string>>(splitter); 420 TestConversionOperator<absl::btree_set<absl::string_view>>(splitter); 421 TestConversionOperator<absl::btree_set<std::string>>(splitter); 422 TestConversionOperator<absl::btree_multiset<absl::string_view>>(splitter); 423 TestConversionOperator<absl::btree_multiset<std::string>>(splitter); 424 TestConversionOperator<std::unordered_set<std::string>>(splitter); 425 426 // Tests conversion to map-like objects. 427 428 TestMapConversionOperator<std::map<absl::string_view, absl::string_view>>( 429 splitter); 430 TestMapConversionOperator<std::map<absl::string_view, std::string>>(splitter); 431 TestMapConversionOperator<std::map<std::string, absl::string_view>>(splitter); 432 TestMapConversionOperator<std::map<std::string, std::string>>(splitter); 433 TestMapConversionOperator< 434 std::multimap<absl::string_view, absl::string_view>>(splitter); 435 TestMapConversionOperator<std::multimap<absl::string_view, std::string>>( 436 splitter); 437 TestMapConversionOperator<std::multimap<std::string, absl::string_view>>( 438 splitter); 439 TestMapConversionOperator<std::multimap<std::string, std::string>>(splitter); 440 TestMapConversionOperator< 441 absl::btree_map<absl::string_view, absl::string_view>>(splitter); 442 TestMapConversionOperator<absl::btree_map<absl::string_view, std::string>>( 443 splitter); 444 TestMapConversionOperator<absl::btree_map<std::string, absl::string_view>>( 445 splitter); 446 TestMapConversionOperator<absl::btree_map<std::string, std::string>>( 447 splitter); 448 TestMapConversionOperator< 449 absl::btree_multimap<absl::string_view, absl::string_view>>(splitter); 450 TestMapConversionOperator< 451 absl::btree_multimap<absl::string_view, std::string>>(splitter); 452 TestMapConversionOperator< 453 absl::btree_multimap<std::string, absl::string_view>>(splitter); 454 TestMapConversionOperator<absl::btree_multimap<std::string, std::string>>( 455 splitter); 456 TestMapConversionOperator<std::unordered_map<std::string, std::string>>( 457 splitter); 458 TestMapConversionOperator< 459 absl::node_hash_map<absl::string_view, absl::string_view>>(splitter); 460 TestMapConversionOperator< 461 absl::node_hash_map<absl::string_view, std::string>>(splitter); 462 TestMapConversionOperator< 463 absl::node_hash_map<std::string, absl::string_view>>(splitter); 464 TestMapConversionOperator< 465 absl::flat_hash_map<absl::string_view, absl::string_view>>(splitter); 466 TestMapConversionOperator< 467 absl::flat_hash_map<absl::string_view, std::string>>(splitter); 468 TestMapConversionOperator< 469 absl::flat_hash_map<std::string, absl::string_view>>(splitter); 470 471 // Tests conversion to std::pair 472 473 TestPairConversionOperator<absl::string_view, absl::string_view>(splitter); 474 TestPairConversionOperator<absl::string_view, std::string>(splitter); 475 TestPairConversionOperator<std::string, absl::string_view>(splitter); 476 TestPairConversionOperator<std::string, std::string>(splitter); 477 478 // Tests conversion to std::array 479 TestArrayConversionOperator<std::string>(splitter); 480 TestArrayConversionOperator<absl::string_view>(splitter); 481 } 482 483 // A few additional tests for conversion to std::pair. This conversion is 484 // different from others because a std::pair always has exactly two elements: 485 // .first and .second. The split has to work even when the split has 486 // less-than, equal-to, and more-than 2 strings. 487 TEST(Splitter, ToPair) { 488 { 489 // Empty string 490 std::pair<std::string, std::string> p = absl::StrSplit("", ','); 491 EXPECT_EQ("", p.first); 492 EXPECT_EQ("", p.second); 493 } 494 495 { 496 // Only first 497 std::pair<std::string, std::string> p = absl::StrSplit("a", ','); 498 EXPECT_EQ("a", p.first); 499 EXPECT_EQ("", p.second); 500 } 501 502 { 503 // Only second 504 std::pair<std::string, std::string> p = absl::StrSplit(",b", ','); 505 EXPECT_EQ("", p.first); 506 EXPECT_EQ("b", p.second); 507 } 508 509 { 510 // First and second. 511 std::pair<std::string, std::string> p = absl::StrSplit("a,b", ','); 512 EXPECT_EQ("a", p.first); 513 EXPECT_EQ("b", p.second); 514 } 515 516 { 517 // First and second and then more stuff that will be ignored. 518 std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ','); 519 EXPECT_EQ("a", p.first); 520 EXPECT_EQ("b", p.second); 521 // "c" is omitted. 522 } 523 } 524 525 // std::array tests similar to std::pair tests above, testing fewer, exactly, 526 // or more elements than the array size. 527 TEST(Splitter, ToArray) { 528 { 529 // Empty string 530 std::array<std::string, 2> p = absl::StrSplit("", ','); 531 EXPECT_THAT(p, ElementsAre("", "")); 532 } 533 534 { 535 // Only first 536 std::array<std::string, 2> p = absl::StrSplit("a", ','); 537 EXPECT_THAT(p, ElementsAre("a", "")); 538 } 539 540 { 541 // Only second 542 std::array<std::string, 2> p = absl::StrSplit(",b", ','); 543 EXPECT_THAT(p, ElementsAre("", "b")); 544 } 545 546 { 547 // First and second. 548 std::array<std::string, 2> p = absl::StrSplit("a,b", ','); 549 EXPECT_THAT(p, ElementsAre("a", "b")); 550 } 551 552 { 553 // First and second and then more stuff that will be ignored. 554 std::array<std::string, 2> p = absl::StrSplit("a,b,c", ','); 555 EXPECT_THAT(p, ElementsAre("a", "b")); 556 // "c" is omitted. 557 } 558 } 559 560 TEST(Splitter, Predicates) { 561 static const char kTestChars[] = ",a, ,b,"; 562 using absl::AllowEmpty; 563 using absl::SkipEmpty; 564 using absl::SkipWhitespace; 565 566 { 567 // No predicate. Does not skip empties. 568 auto splitter = absl::StrSplit(kTestChars, ','); 569 std::vector<std::string> v = splitter; 570 EXPECT_THAT(v, ElementsAre("", "a", " ", "b", "")); 571 } 572 573 { 574 // Allows empty strings. Same behavior as no predicate at all. 575 auto splitter = absl::StrSplit(kTestChars, ',', AllowEmpty()); 576 std::vector<std::string> v_allowempty = splitter; 577 EXPECT_THAT(v_allowempty, ElementsAre("", "a", " ", "b", "")); 578 579 // Ensures AllowEmpty equals the behavior with no predicate. 580 auto splitter_nopredicate = absl::StrSplit(kTestChars, ','); 581 std::vector<std::string> v_nopredicate = splitter_nopredicate; 582 EXPECT_EQ(v_allowempty, v_nopredicate); 583 } 584 585 { 586 // Skips empty strings. 587 auto splitter = absl::StrSplit(kTestChars, ',', SkipEmpty()); 588 std::vector<std::string> v = splitter; 589 EXPECT_THAT(v, ElementsAre("a", " ", "b")); 590 } 591 592 { 593 // Skips empty and all-whitespace strings. 594 auto splitter = absl::StrSplit(kTestChars, ',', SkipWhitespace()); 595 std::vector<std::string> v = splitter; 596 EXPECT_THAT(v, ElementsAre("a", "b")); 597 } 598 } 599 600 // 601 // Tests for StrSplit() 602 // 603 604 TEST(Split, Basics) { 605 { 606 // Doesn't really do anything useful because the return value is ignored, 607 // but it should work. 608 absl::StrSplit("a,b,c", ','); 609 } 610 611 { 612 std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ','); 613 EXPECT_THAT(v, ElementsAre("a", "b", "c")); 614 } 615 616 { 617 std::vector<std::string> v = absl::StrSplit("a,b,c", ','); 618 EXPECT_THAT(v, ElementsAre("a", "b", "c")); 619 } 620 621 { 622 // Ensures that assignment works. This requires a little extra work with 623 // C++11 because of overloads with initializer_list. 624 std::vector<std::string> v; 625 v = absl::StrSplit("a,b,c", ','); 626 627 EXPECT_THAT(v, ElementsAre("a", "b", "c")); 628 std::map<std::string, std::string> m; 629 m = absl::StrSplit("a,b,c", ','); 630 EXPECT_EQ(2, m.size()); 631 std::unordered_map<std::string, std::string> hm; 632 hm = absl::StrSplit("a,b,c", ','); 633 EXPECT_EQ(2, hm.size()); 634 } 635 } 636 637 absl::string_view ReturnStringView() { return "Hello World"; } 638 const char* ReturnConstCharP() { return "Hello World"; } 639 char* ReturnCharP() { return const_cast<char*>("Hello World"); } 640 641 TEST(Split, AcceptsCertainTemporaries) { 642 std::vector<std::string> v; 643 v = absl::StrSplit(ReturnStringView(), ' '); 644 EXPECT_THAT(v, ElementsAre("Hello", "World")); 645 v = absl::StrSplit(ReturnConstCharP(), ' '); 646 EXPECT_THAT(v, ElementsAre("Hello", "World")); 647 v = absl::StrSplit(ReturnCharP(), ' '); 648 EXPECT_THAT(v, ElementsAre("Hello", "World")); 649 } 650 651 TEST(Split, Temporary) { 652 // Use a std::string longer than the SSO length, so that when the temporary is 653 // destroyed, if the splitter keeps a reference to the string's contents, 654 // it'll reference freed memory instead of just dead on-stack memory. 655 const char input[] = "a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u"; 656 EXPECT_LT(sizeof(std::string), ABSL_ARRAYSIZE(input)) 657 << "Input should be larger than fits on the stack."; 658 659 // This happens more often in C++11 as part of a range-based for loop. 660 auto splitter = absl::StrSplit(std::string(input), ','); 661 std::string expected = "a"; 662 for (absl::string_view letter : splitter) { 663 EXPECT_EQ(expected, letter); 664 ++expected[0]; 665 } 666 EXPECT_EQ("v", expected); 667 668 // This happens more often in C++11 as part of a range-based for loop. 669 auto std_splitter = absl::StrSplit(std::string(input), ','); 670 expected = "a"; 671 for (absl::string_view letter : std_splitter) { 672 EXPECT_EQ(expected, letter); 673 ++expected[0]; 674 } 675 EXPECT_EQ("v", expected); 676 } 677 678 template <typename T> 679 static std::unique_ptr<T> CopyToHeap(const T& value) { 680 return std::unique_ptr<T>(new T(value)); 681 } 682 683 TEST(Split, LvalueCaptureIsCopyable) { 684 std::string input = "a,b"; 685 auto heap_splitter = CopyToHeap(absl::StrSplit(input, ',')); 686 auto stack_splitter = *heap_splitter; 687 heap_splitter.reset(); 688 std::vector<std::string> result = stack_splitter; 689 EXPECT_THAT(result, testing::ElementsAre("a", "b")); 690 } 691 692 TEST(Split, TemporaryCaptureIsCopyable) { 693 auto heap_splitter = CopyToHeap(absl::StrSplit(std::string("a,b"), ',')); 694 auto stack_splitter = *heap_splitter; 695 heap_splitter.reset(); 696 std::vector<std::string> result = stack_splitter; 697 EXPECT_THAT(result, testing::ElementsAre("a", "b")); 698 } 699 700 TEST(Split, SplitterIsCopyableAndMoveable) { 701 auto a = absl::StrSplit("foo", '-'); 702 703 // Ensures that the following expressions compile. 704 auto b = a; // Copy construct 705 auto c = std::move(a); // Move construct 706 b = c; // Copy assign 707 c = std::move(b); // Move assign 708 709 EXPECT_THAT(c, ElementsAre("foo")); 710 } 711 712 TEST(Split, StringDelimiter) { 713 { 714 std::vector<absl::string_view> v = absl::StrSplit("a,b", ','); 715 EXPECT_THAT(v, ElementsAre("a", "b")); 716 } 717 718 { 719 std::vector<absl::string_view> v = absl::StrSplit("a,b", std::string(",")); 720 EXPECT_THAT(v, ElementsAre("a", "b")); 721 } 722 723 { 724 std::vector<absl::string_view> v = 725 absl::StrSplit("a,b", absl::string_view(",")); 726 EXPECT_THAT(v, ElementsAre("a", "b")); 727 } 728 } 729 730 #if !defined(__cpp_char8_t) 731 #if defined(__clang__) 732 #pragma clang diagnostic push 733 #pragma clang diagnostic ignored "-Wc++2a-compat" 734 #endif 735 TEST(Split, UTF8) { 736 // Tests splitting utf8 strings and utf8 delimiters. 737 std::string utf8_string = u8"\u03BA\u1F79\u03C3\u03BC\u03B5"; 738 { 739 // A utf8 input string with an ascii delimiter. 740 std::string to_split = "a," + utf8_string; 741 std::vector<absl::string_view> v = absl::StrSplit(to_split, ','); 742 EXPECT_THAT(v, ElementsAre("a", utf8_string)); 743 } 744 745 { 746 // A utf8 input string and a utf8 delimiter. 747 std::string to_split = "a," + utf8_string + ",b"; 748 std::string unicode_delimiter = "," + utf8_string + ","; 749 std::vector<absl::string_view> v = 750 absl::StrSplit(to_split, unicode_delimiter); 751 EXPECT_THAT(v, ElementsAre("a", "b")); 752 } 753 754 { 755 // A utf8 input string and ByAnyChar with ascii chars. 756 std::vector<absl::string_view> v = 757 absl::StrSplit(u8"Foo h\u00E4llo th\u4E1Ere", absl::ByAnyChar(" \t")); 758 EXPECT_THAT(v, ElementsAre("Foo", u8"h\u00E4llo", u8"th\u4E1Ere")); 759 } 760 } 761 #if defined(__clang__) 762 #pragma clang diagnostic pop 763 #endif 764 #endif // !defined(__cpp_char8_t) 765 766 TEST(Split, EmptyStringDelimiter) { 767 { 768 std::vector<std::string> v = absl::StrSplit("", ""); 769 EXPECT_THAT(v, ElementsAre("")); 770 } 771 772 { 773 std::vector<std::string> v = absl::StrSplit("a", ""); 774 EXPECT_THAT(v, ElementsAre("a")); 775 } 776 777 { 778 std::vector<std::string> v = absl::StrSplit("ab", ""); 779 EXPECT_THAT(v, ElementsAre("a", "b")); 780 } 781 782 { 783 std::vector<std::string> v = absl::StrSplit("a b", ""); 784 EXPECT_THAT(v, ElementsAre("a", " ", "b")); 785 } 786 } 787 788 TEST(Split, SubstrDelimiter) { 789 std::vector<absl::string_view> results; 790 absl::string_view delim("//"); 791 792 results = absl::StrSplit("", delim); 793 EXPECT_THAT(results, ElementsAre("")); 794 795 results = absl::StrSplit("//", delim); 796 EXPECT_THAT(results, ElementsAre("", "")); 797 798 results = absl::StrSplit("ab", delim); 799 EXPECT_THAT(results, ElementsAre("ab")); 800 801 results = absl::StrSplit("ab//", delim); 802 EXPECT_THAT(results, ElementsAre("ab", "")); 803 804 results = absl::StrSplit("ab/", delim); 805 EXPECT_THAT(results, ElementsAre("ab/")); 806 807 results = absl::StrSplit("a/b", delim); 808 EXPECT_THAT(results, ElementsAre("a/b")); 809 810 results = absl::StrSplit("a//b", delim); 811 EXPECT_THAT(results, ElementsAre("a", "b")); 812 813 results = absl::StrSplit("a///b", delim); 814 EXPECT_THAT(results, ElementsAre("a", "/b")); 815 816 results = absl::StrSplit("a////b", delim); 817 EXPECT_THAT(results, ElementsAre("a", "", "b")); 818 } 819 820 TEST(Split, EmptyResults) { 821 std::vector<absl::string_view> results; 822 823 results = absl::StrSplit("", '#'); 824 EXPECT_THAT(results, ElementsAre("")); 825 826 results = absl::StrSplit("#", '#'); 827 EXPECT_THAT(results, ElementsAre("", "")); 828 829 results = absl::StrSplit("#cd", '#'); 830 EXPECT_THAT(results, ElementsAre("", "cd")); 831 832 results = absl::StrSplit("ab#cd#", '#'); 833 EXPECT_THAT(results, ElementsAre("ab", "cd", "")); 834 835 results = absl::StrSplit("ab##cd", '#'); 836 EXPECT_THAT(results, ElementsAre("ab", "", "cd")); 837 838 results = absl::StrSplit("ab##", '#'); 839 EXPECT_THAT(results, ElementsAre("ab", "", "")); 840 841 results = absl::StrSplit("ab#ab#", '#'); 842 EXPECT_THAT(results, ElementsAre("ab", "ab", "")); 843 844 results = absl::StrSplit("aaaa", 'a'); 845 EXPECT_THAT(results, ElementsAre("", "", "", "", "")); 846 847 results = absl::StrSplit("", '#', absl::SkipEmpty()); 848 EXPECT_THAT(results, ElementsAre()); 849 } 850 851 template <typename Delimiter> 852 static bool IsFoundAtStartingPos(absl::string_view text, Delimiter d, 853 size_t starting_pos, int expected_pos) { 854 absl::string_view found = d.Find(text, starting_pos); 855 return found.data() != text.data() + text.size() && 856 expected_pos == found.data() - text.data(); 857 } 858 859 // Helper function for testing Delimiter objects. Returns true if the given 860 // Delimiter is found in the given string at the given position. This function 861 // tests two cases: 862 // 1. The actual text given, staring at position 0 863 // 2. The text given with leading padding that should be ignored 864 template <typename Delimiter> 865 static bool IsFoundAt(absl::string_view text, Delimiter d, int expected_pos) { 866 const std::string leading_text = ",x,y,z,"; 867 return IsFoundAtStartingPos(text, d, 0, expected_pos) && 868 IsFoundAtStartingPos(leading_text + std::string(text), d, 869 leading_text.length(), 870 expected_pos + leading_text.length()); 871 } 872 873 // 874 // Tests for ByString 875 // 876 877 // Tests using any delimiter that represents a single comma. 878 template <typename Delimiter> 879 void TestComma(Delimiter d) { 880 EXPECT_TRUE(IsFoundAt(",", d, 0)); 881 EXPECT_TRUE(IsFoundAt("a,", d, 1)); 882 EXPECT_TRUE(IsFoundAt(",b", d, 0)); 883 EXPECT_TRUE(IsFoundAt("a,b", d, 1)); 884 EXPECT_TRUE(IsFoundAt("a,b,", d, 1)); 885 EXPECT_TRUE(IsFoundAt("a,b,c", d, 1)); 886 EXPECT_FALSE(IsFoundAt("", d, -1)); 887 EXPECT_FALSE(IsFoundAt(" ", d, -1)); 888 EXPECT_FALSE(IsFoundAt("a", d, -1)); 889 EXPECT_FALSE(IsFoundAt("a b c", d, -1)); 890 EXPECT_FALSE(IsFoundAt("a;b;c", d, -1)); 891 EXPECT_FALSE(IsFoundAt(";", d, -1)); 892 } 893 894 TEST(Delimiter, ByString) { 895 using absl::ByString; 896 TestComma(ByString(",")); 897 898 // Works as named variable. 899 ByString comma_string(","); 900 TestComma(comma_string); 901 902 // The first occurrence of empty string ("") in a string is at position 0. 903 // There is a test below that demonstrates this for absl::string_view::find(). 904 // If the ByString delimiter returned position 0 for this, there would 905 // be an infinite loop in the SplitIterator code. To avoid this, empty string 906 // is a special case in that it always returns the item at position 1. 907 absl::string_view abc("abc"); 908 EXPECT_EQ(0, abc.find("")); // "" is found at position 0 909 ByString empty(""); 910 EXPECT_FALSE(IsFoundAt("", empty, 0)); 911 EXPECT_FALSE(IsFoundAt("a", empty, 0)); 912 EXPECT_TRUE(IsFoundAt("ab", empty, 1)); 913 EXPECT_TRUE(IsFoundAt("abc", empty, 1)); 914 } 915 916 TEST(Split, ByChar) { 917 using absl::ByChar; 918 TestComma(ByChar(',')); 919 920 // Works as named variable. 921 ByChar comma_char(','); 922 TestComma(comma_char); 923 } 924 925 // 926 // Tests for ByAnyChar 927 // 928 929 TEST(Delimiter, ByAnyChar) { 930 using absl::ByAnyChar; 931 ByAnyChar one_delim(","); 932 // Found 933 EXPECT_TRUE(IsFoundAt(",", one_delim, 0)); 934 EXPECT_TRUE(IsFoundAt("a,", one_delim, 1)); 935 EXPECT_TRUE(IsFoundAt("a,b", one_delim, 1)); 936 EXPECT_TRUE(IsFoundAt(",b", one_delim, 0)); 937 // Not found 938 EXPECT_FALSE(IsFoundAt("", one_delim, -1)); 939 EXPECT_FALSE(IsFoundAt(" ", one_delim, -1)); 940 EXPECT_FALSE(IsFoundAt("a", one_delim, -1)); 941 EXPECT_FALSE(IsFoundAt("a;b;c", one_delim, -1)); 942 EXPECT_FALSE(IsFoundAt(";", one_delim, -1)); 943 944 ByAnyChar two_delims(",;"); 945 // Found 946 EXPECT_TRUE(IsFoundAt(",", two_delims, 0)); 947 EXPECT_TRUE(IsFoundAt(";", two_delims, 0)); 948 EXPECT_TRUE(IsFoundAt(",;", two_delims, 0)); 949 EXPECT_TRUE(IsFoundAt(";,", two_delims, 0)); 950 EXPECT_TRUE(IsFoundAt(",;b", two_delims, 0)); 951 EXPECT_TRUE(IsFoundAt(";,b", two_delims, 0)); 952 EXPECT_TRUE(IsFoundAt("a;,", two_delims, 1)); 953 EXPECT_TRUE(IsFoundAt("a,;", two_delims, 1)); 954 EXPECT_TRUE(IsFoundAt("a;,b", two_delims, 1)); 955 EXPECT_TRUE(IsFoundAt("a,;b", two_delims, 1)); 956 // Not found 957 EXPECT_FALSE(IsFoundAt("", two_delims, -1)); 958 EXPECT_FALSE(IsFoundAt(" ", two_delims, -1)); 959 EXPECT_FALSE(IsFoundAt("a", two_delims, -1)); 960 EXPECT_FALSE(IsFoundAt("a=b=c", two_delims, -1)); 961 EXPECT_FALSE(IsFoundAt("=", two_delims, -1)); 962 963 // ByAnyChar behaves just like ByString when given a delimiter of empty 964 // string. That is, it always returns a zero-length absl::string_view 965 // referring to the item at position 1, not position 0. 966 ByAnyChar empty(""); 967 EXPECT_FALSE(IsFoundAt("", empty, 0)); 968 EXPECT_FALSE(IsFoundAt("a", empty, 0)); 969 EXPECT_TRUE(IsFoundAt("ab", empty, 1)); 970 EXPECT_TRUE(IsFoundAt("abc", empty, 1)); 971 } 972 973 // 974 // Tests for ByAsciiWhitespace 975 // 976 TEST(Split, ByAsciiWhitespace) { 977 using absl::ByAsciiWhitespace; 978 using absl::SkipEmpty; 979 std::vector<absl::string_view> results; 980 981 results = absl::StrSplit("aaaa\n", ByAsciiWhitespace()); 982 EXPECT_THAT(results, ElementsAre("aaaa", "")); 983 984 results = absl::StrSplit("aaaa\n", ByAsciiWhitespace(), SkipEmpty()); 985 EXPECT_THAT(results, ElementsAre("aaaa")); 986 987 results = absl::StrSplit(" ", ByAsciiWhitespace()); 988 EXPECT_THAT(results, ElementsAre("", "")); 989 990 results = absl::StrSplit(" ", ByAsciiWhitespace(), SkipEmpty()); 991 EXPECT_THAT(results, IsEmpty()); 992 993 results = absl::StrSplit("a", ByAsciiWhitespace()); 994 EXPECT_THAT(results, ElementsAre("a")); 995 996 results = absl::StrSplit("", ByAsciiWhitespace()); 997 EXPECT_THAT(results, ElementsAre("")); 998 999 results = absl::StrSplit("", ByAsciiWhitespace(), SkipEmpty()); 1000 EXPECT_THAT(results, IsEmpty()); 1001 1002 results = absl::StrSplit("a b\tc\n d\n", ByAsciiWhitespace()); 1003 EXPECT_THAT(results, ElementsAre("a", "b", "c", "", "", "d", "")); 1004 1005 results = absl::StrSplit("a b\tc\n d \n", ByAsciiWhitespace(), SkipEmpty()); 1006 EXPECT_THAT(results, ElementsAre("a", "b", "c", "d")); 1007 1008 results = absl::StrSplit("a\t\n\v\f\r b", ByAsciiWhitespace(), SkipEmpty()); 1009 EXPECT_THAT(results, ElementsAre("a", "b")); 1010 } 1011 1012 // 1013 // Tests for ByLength 1014 // 1015 1016 TEST(Delimiter, ByLength) { 1017 using absl::ByLength; 1018 1019 ByLength four_char_delim(4); 1020 1021 // Found 1022 EXPECT_TRUE(IsFoundAt("abcde", four_char_delim, 4)); 1023 EXPECT_TRUE(IsFoundAt("abcdefghijklmnopqrstuvwxyz", four_char_delim, 4)); 1024 EXPECT_TRUE(IsFoundAt("a b,c\nd", four_char_delim, 4)); 1025 // Not found 1026 EXPECT_FALSE(IsFoundAt("", four_char_delim, 0)); 1027 EXPECT_FALSE(IsFoundAt("a", four_char_delim, 0)); 1028 EXPECT_FALSE(IsFoundAt("ab", four_char_delim, 0)); 1029 EXPECT_FALSE(IsFoundAt("abc", four_char_delim, 0)); 1030 EXPECT_FALSE(IsFoundAt("abcd", four_char_delim, 0)); 1031 } 1032 1033 TEST(Split, WorksWithLargeStrings) { 1034 #if defined(ABSL_HAVE_ADDRESS_SANITIZER) || \ 1035 defined(ABSL_HAVE_MEMORY_SANITIZER) || defined(ABSL_HAVE_THREAD_SANITIZER) 1036 constexpr size_t kSize = (uint32_t{1} << 26) + 1; // 64M + 1 byte 1037 #else 1038 constexpr size_t kSize = (uint32_t{1} << 31) + 1; // 2G + 1 byte 1039 #endif 1040 if (sizeof(size_t) > 4) { 1041 std::string s(kSize, 'x'); 1042 s.back() = '-'; 1043 std::vector<absl::string_view> v = absl::StrSplit(s, '-'); 1044 EXPECT_EQ(2, v.size()); 1045 // The first element will contain 2G of 'x's. 1046 // testing::StartsWith is too slow with a 2G string. 1047 EXPECT_EQ('x', v[0][0]); 1048 EXPECT_EQ('x', v[0][1]); 1049 EXPECT_EQ('x', v[0][3]); 1050 EXPECT_EQ("", v[1]); 1051 } 1052 } 1053 1054 TEST(SplitInternalTest, TypeTraits) { 1055 EXPECT_FALSE(absl::strings_internal::HasMappedType<int>::value); 1056 EXPECT_TRUE( 1057 (absl::strings_internal::HasMappedType<std::map<int, int>>::value)); 1058 EXPECT_FALSE(absl::strings_internal::HasValueType<int>::value); 1059 EXPECT_TRUE( 1060 (absl::strings_internal::HasValueType<std::map<int, int>>::value)); 1061 EXPECT_FALSE(absl::strings_internal::HasConstIterator<int>::value); 1062 EXPECT_TRUE( 1063 (absl::strings_internal::HasConstIterator<std::map<int, int>>::value)); 1064 EXPECT_FALSE(absl::strings_internal::IsInitializerList<int>::value); 1065 EXPECT_TRUE((absl::strings_internal::IsInitializerList< 1066 std::initializer_list<int>>::value)); 1067 } 1068 1069 } // namespace