stringpiece.h (10532B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 // Copyright (C) 2009-2013, International Business Machines 4 // Corporation and others. All Rights Reserved. 5 // 6 // Copyright 2001 and onwards Google Inc. 7 // Author: Sanjay Ghemawat 8 9 // This code is a contribution of Google code, and the style used here is 10 // a compromise between the original Google code and the ICU coding guidelines. 11 // For example, data types are ICU-ified (size_t,int->int32_t), 12 // and API comments doxygen-ified, but function names and behavior are 13 // as in the original, if possible. 14 // Assertion-style error handling, not available in ICU, was changed to 15 // parameter "pinning" similar to UnicodeString. 16 // 17 // In addition, this is only a partial port of the original Google code, 18 // limited to what was needed so far. The (nearly) complete original code 19 // is in the ICU svn repository at icuhtml/trunk/design/strings/contrib 20 // (see ICU ticket 6765, r25517). 21 22 #ifndef __STRINGPIECE_H__ 23 #define __STRINGPIECE_H__ 24 25 /** 26 * \file 27 * \brief C++ API: StringPiece: Read-only byte string wrapper class. 28 */ 29 30 #include "unicode/utypes.h" 31 32 #if U_SHOW_CPLUSPLUS_API 33 34 #include <cstddef> 35 #include <string_view> 36 #include <type_traits> 37 38 #include "unicode/uobject.h" 39 #include "unicode/std_string.h" 40 41 // Arghh! I wish C++ literals were "string". 42 43 U_NAMESPACE_BEGIN 44 45 /** 46 * A string-like object that points to a sized piece of memory. 47 * 48 * We provide non-explicit singleton constructors so users can pass 49 * in a "const char*" or a "string" wherever a "StringPiece" is 50 * expected. 51 * 52 * Functions or methods may use StringPiece parameters to accept either a 53 * "const char*" or a "string" value that will be implicitly converted to a 54 * StringPiece. 55 * 56 * Systematic usage of StringPiece is encouraged as it will reduce unnecessary 57 * conversions from "const char*" to "string" and back again. 58 * 59 * @stable ICU 4.2 60 */ 61 class U_COMMON_API StringPiece : public UMemory { 62 private: 63 const char* ptr_; 64 int32_t length_; 65 66 public: 67 /** 68 * Default constructor, creates an empty StringPiece. 69 * @stable ICU 4.2 70 */ 71 StringPiece() : ptr_(nullptr), length_(0) { } 72 73 /** 74 * Constructs from a NUL-terminated const char * pointer. 75 * @param str a NUL-terminated const char * pointer 76 * @stable ICU 4.2 77 */ 78 StringPiece(const char* str); 79 #if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN) 80 /** 81 * Constructs from a NUL-terminated const char8_t * pointer. 82 * @param str a NUL-terminated const char8_t * pointer 83 * @stable ICU 67 84 */ 85 StringPiece(const char8_t* str) : StringPiece(reinterpret_cast<const char*>(str)) {} 86 #endif 87 /** 88 * Constructs an empty StringPiece. 89 * Needed for type disambiguation from multiple other overloads. 90 * @param p nullptr 91 * @stable ICU 67 92 */ 93 StringPiece(std::nullptr_t p) : ptr_(p), length_(0) {} 94 95 /** 96 * Constructs from a std::string. 97 * @stable ICU 4.2 98 */ 99 StringPiece(const std::string& str) 100 : ptr_(str.data()), length_(static_cast<int32_t>(str.size())) { } 101 #if defined(__cpp_lib_char8_t) || defined(U_IN_DOXYGEN) 102 /** 103 * Constructs from a std::u8string. 104 * @stable ICU 67 105 */ 106 StringPiece(const std::u8string& str) 107 : ptr_(reinterpret_cast<const char*>(str.data())), 108 length_(static_cast<int32_t>(str.size())) { } 109 #endif 110 111 /** 112 * Constructs from some other implementation of a string piece class, from any 113 * C++ record type that has these two methods: 114 * 115 * \code{.cpp} 116 * 117 * struct OtherStringPieceClass { 118 * const char* data(); // or const char8_t* 119 * size_t size(); 120 * }; 121 * 122 * \endcode 123 * 124 * The other string piece class will typically be std::string_view from C++17 125 * or absl::string_view from Abseil. 126 * 127 * Starting with C++20, data() may also return a const char8_t* pointer, 128 * as from std::u8string_view. 129 * 130 * @param str the other string piece 131 * @stable ICU 65 132 */ 133 template <typename T, 134 typename = std::enable_if_t< 135 (std::is_same_v<decltype(T().data()), const char*> 136 #if defined(__cpp_char8_t) 137 || std::is_same_v<decltype(T().data()), const char8_t*> 138 #endif 139 ) && 140 std::is_same_v<decltype(T().size()), size_t>>> 141 StringPiece(T str) 142 : ptr_(reinterpret_cast<const char*>(str.data())), 143 length_(static_cast<int32_t>(str.size())) {} 144 145 /** 146 * Constructs from a const char * pointer and a specified length. 147 * @param offset a const char * pointer (need not be terminated) 148 * @param len the length of the string; must be non-negative 149 * @stable ICU 4.2 150 */ 151 StringPiece(const char* offset, int32_t len) : ptr_(offset), length_(len) { } 152 #if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN) 153 /** 154 * Constructs from a const char8_t * pointer and a specified length. 155 * @param str a const char8_t * pointer (need not be terminated) 156 * @param len the length of the string; must be non-negative 157 * @stable ICU 67 158 */ 159 StringPiece(const char8_t* str, int32_t len) : 160 StringPiece(reinterpret_cast<const char*>(str), len) {} 161 #endif 162 163 /** 164 * Substring of another StringPiece. 165 * @param x the other StringPiece 166 * @param pos start position in x; must be non-negative and <= x.length(). 167 * @stable ICU 4.2 168 */ 169 StringPiece(const StringPiece& x, int32_t pos); 170 /** 171 * Substring of another StringPiece. 172 * @param x the other StringPiece 173 * @param pos start position in x; must be non-negative and <= x.length(). 174 * @param len length of the substring; 175 * must be non-negative and will be pinned to at most x.length() - pos. 176 * @stable ICU 4.2 177 */ 178 StringPiece(const StringPiece& x, int32_t pos, int32_t len); 179 180 #ifndef U_HIDE_INTERNAL_API 181 /** 182 * Converts to a std::string_view(). 183 * @internal 184 */ 185 inline operator std::string_view() const { 186 return {data(), static_cast<std::string_view::size_type>(size())}; 187 } 188 #endif // U_HIDE_INTERNAL_API 189 190 /** 191 * Returns the string pointer. May be nullptr if it is empty. 192 * 193 * data() may return a pointer to a buffer with embedded NULs, and the 194 * returned buffer may or may not be null terminated. Therefore it is 195 * typically a mistake to pass data() to a routine that expects a NUL 196 * terminated string. 197 * @return the string pointer 198 * @stable ICU 4.2 199 */ 200 const char* data() const { return ptr_; } 201 /** 202 * Returns the string length. Same as length(). 203 * @return the string length 204 * @stable ICU 4.2 205 */ 206 int32_t size() const { return length_; } 207 /** 208 * Returns the string length. Same as size(). 209 * @return the string length 210 * @stable ICU 4.2 211 */ 212 int32_t length() const { return length_; } 213 /** 214 * Returns whether the string is empty. 215 * @return true if the string is empty 216 * @stable ICU 4.2 217 */ 218 UBool empty() const { return length_ == 0; } 219 220 /** 221 * Sets to an empty string. 222 * @stable ICU 4.2 223 */ 224 void clear() { ptr_ = nullptr; length_ = 0; } 225 226 /** 227 * Reset the stringpiece to refer to new data. 228 * @param xdata pointer the new string data. Need not be nul terminated. 229 * @param len the length of the new data 230 * @stable ICU 4.8 231 */ 232 void set(const char* xdata, int32_t len) { ptr_ = xdata; length_ = len; } 233 234 /** 235 * Reset the stringpiece to refer to new data. 236 * @param str a pointer to a NUL-terminated string. 237 * @stable ICU 4.8 238 */ 239 void set(const char* str); 240 241 #if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN) 242 /** 243 * Resets the stringpiece to refer to new data. 244 * @param xdata pointer the new string data. Need not be NUL-terminated. 245 * @param len the length of the new data 246 * @stable ICU 67 247 */ 248 inline void set(const char8_t* xdata, int32_t len) { 249 set(reinterpret_cast<const char*>(xdata), len); 250 } 251 252 /** 253 * Resets the stringpiece to refer to new data. 254 * @param str a pointer to a NUL-terminated string. 255 * @stable ICU 67 256 */ 257 inline void set(const char8_t* str) { 258 set(reinterpret_cast<const char*>(str)); 259 } 260 #endif 261 262 /** 263 * Removes the first n string units. 264 * @param n prefix length, must be non-negative and <=length() 265 * @stable ICU 4.2 266 */ 267 void remove_prefix(int32_t n) { 268 if (n >= 0) { 269 if (n > length_) { 270 n = length_; 271 } 272 ptr_ += n; 273 length_ -= n; 274 } 275 } 276 277 /** 278 * Removes the last n string units. 279 * @param n suffix length, must be non-negative and <=length() 280 * @stable ICU 4.2 281 */ 282 void remove_suffix(int32_t n) { 283 if (n >= 0) { 284 if (n <= length_) { 285 length_ -= n; 286 } else { 287 length_ = 0; 288 } 289 } 290 } 291 292 /** 293 * Searches the StringPiece for the given search string (needle); 294 * @param needle The string for which to search. 295 * @param offset Where to start searching within this string (haystack). 296 * @return The offset of needle in haystack, or -1 if not found. 297 * @stable ICU 67 298 */ 299 int32_t find(StringPiece needle, int32_t offset); 300 301 /** 302 * Compares this StringPiece with the other StringPiece, with semantics 303 * similar to std::string::compare(). 304 * @param other The string to compare to. 305 * @return below zero if this < other; above zero if this > other; 0 if this == other. 306 * @stable ICU 67 307 */ 308 int32_t compare(StringPiece other); 309 310 /** 311 * Maximum integer, used as a default value for substring methods. 312 * @stable ICU 4.2 313 */ 314 static const int32_t npos; // = 0x7fffffff; 315 316 /** 317 * Returns a substring of this StringPiece. 318 * @param pos start position; must be non-negative and <= length(). 319 * @param len length of the substring; 320 * must be non-negative and will be pinned to at most length() - pos. 321 * @return the substring StringPiece 322 * @stable ICU 4.2 323 */ 324 StringPiece substr(int32_t pos, int32_t len = npos) const { 325 return StringPiece(*this, pos, len); 326 } 327 }; 328 329 /** 330 * Global operator == for StringPiece 331 * @param x The first StringPiece to compare. 332 * @param y The second StringPiece to compare. 333 * @return true if the string data is equal 334 * @stable ICU 4.8 335 */ 336 U_COMMON_API UBool U_EXPORT2 337 operator==(const StringPiece& x, const StringPiece& y); 338 339 /** 340 * Global operator != for StringPiece 341 * @param x The first StringPiece to compare. 342 * @param y The second StringPiece to compare. 343 * @return true if the string data is not equal 344 * @stable ICU 4.8 345 */ 346 inline bool operator!=(const StringPiece& x, const StringPiece& y) { 347 return !(x == y); 348 } 349 350 U_NAMESPACE_END 351 352 #endif /* U_SHOW_CPLUSPLUS_API */ 353 354 #endif // __STRINGPIECE_H__