str_split.cc (4814B)
1 // Copyright 2017 The Abseil Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "absl/strings/str_split.h" 16 17 #include <algorithm> 18 #include <cstddef> 19 #include <cstdlib> 20 #include <cstring> 21 22 #include "absl/base/config.h" 23 #include "absl/base/internal/raw_logging.h" 24 #include "absl/strings/string_view.h" 25 26 namespace absl { 27 ABSL_NAMESPACE_BEGIN 28 29 namespace { 30 31 // This GenericFind() template function encapsulates the finding algorithm 32 // shared between the ByString and ByAnyChar delimiters. The FindPolicy 33 // template parameter allows each delimiter to customize the actual find 34 // function to use and the length of the found delimiter. For example, the 35 // Literal delimiter will ultimately use absl::string_view::find(), and the 36 // AnyOf delimiter will use absl::string_view::find_first_of(). 37 template <typename FindPolicy> 38 absl::string_view GenericFind(absl::string_view text, 39 absl::string_view delimiter, size_t pos, 40 FindPolicy find_policy) { 41 if (delimiter.empty() && text.length() > 0) { 42 // Special case for empty string delimiters: always return a zero-length 43 // absl::string_view referring to the item at position 1 past pos. 44 return absl::string_view(text.data() + pos + 1, 0); 45 } 46 size_t found_pos = absl::string_view::npos; 47 absl::string_view found(text.data() + text.size(), 48 0); // By default, not found 49 found_pos = find_policy.Find(text, delimiter, pos); 50 if (found_pos != absl::string_view::npos) { 51 found = absl::string_view(text.data() + found_pos, 52 find_policy.Length(delimiter)); 53 } 54 return found; 55 } 56 57 // Finds using absl::string_view::find(), therefore the length of the found 58 // delimiter is delimiter.length(). 59 struct LiteralPolicy { 60 static size_t Find(absl::string_view text, absl::string_view delimiter, 61 size_t pos) { 62 return text.find(delimiter, pos); 63 } 64 static size_t Length(absl::string_view delimiter) { 65 return delimiter.length(); 66 } 67 }; 68 69 // Finds using absl::string_view::find_first_of(), therefore the length of the 70 // found delimiter is 1. 71 struct AnyOfPolicy { 72 static size_t Find(absl::string_view text, absl::string_view delimiter, 73 size_t pos) { 74 return text.find_first_of(delimiter, pos); 75 } 76 static size_t Length(absl::string_view /* delimiter */) { return 1; } 77 }; 78 79 } // namespace 80 81 // 82 // ByString 83 // 84 85 ByString::ByString(absl::string_view sp) : delimiter_(sp) {} 86 87 absl::string_view ByString::Find(absl::string_view text, size_t pos) const { 88 if (delimiter_.length() == 1) { 89 // Much faster to call find on a single character than on an 90 // absl::string_view. 91 size_t found_pos = text.find(delimiter_[0], pos); 92 if (found_pos == absl::string_view::npos) 93 return absl::string_view(text.data() + text.size(), 0); 94 return text.substr(found_pos, 1); 95 } 96 return GenericFind(text, delimiter_, pos, LiteralPolicy()); 97 } 98 99 absl::string_view ByAsciiWhitespace::Find(absl::string_view text, 100 size_t pos) const { 101 return GenericFind(text, " \t\v\f\r\n", pos, AnyOfPolicy()); 102 } 103 104 // 105 // ByChar 106 // 107 108 absl::string_view ByChar::Find(absl::string_view text, size_t pos) const { 109 size_t found_pos = text.find(c_, pos); 110 if (found_pos == absl::string_view::npos) 111 return absl::string_view(text.data() + text.size(), 0); 112 return text.substr(found_pos, 1); 113 } 114 115 // 116 // ByAnyChar 117 // 118 119 ByAnyChar::ByAnyChar(absl::string_view sp) : delimiters_(sp) {} 120 121 absl::string_view ByAnyChar::Find(absl::string_view text, size_t pos) const { 122 return GenericFind(text, delimiters_, pos, AnyOfPolicy()); 123 } 124 125 // 126 // ByLength 127 // 128 ByLength::ByLength(ptrdiff_t length) : length_(length) { 129 ABSL_RAW_CHECK(length > 0, ""); 130 } 131 132 absl::string_view ByLength::Find(absl::string_view text, size_t pos) const { 133 pos = std::min(pos, text.size()); // truncate `pos` 134 absl::string_view substr = text.substr(pos); 135 // If the string is shorter than the chunk size we say we 136 // "can't find the delimiter" so this will be the last chunk. 137 if (substr.length() <= static_cast<size_t>(length_)) 138 return absl::string_view(text.data() + text.size(), 0); 139 140 return absl::string_view(substr.data() + length_, 0); 141 } 142 143 ABSL_NAMESPACE_END 144 } // namespace absl