unistr.h (188891B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (C) 1998-2016, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 8 * 9 * File unistr.h 10 * 11 * Modification History: 12 * 13 * Date Name Description 14 * 09/25/98 stephen Creation. 15 * 11/11/98 stephen Changed per 11/9 code review. 16 * 04/20/99 stephen Overhauled per 4/16 code review. 17 * 11/18/99 aliu Made to inherit from Replaceable. Added method 18 * handleReplaceBetween(); other methods unchanged. 19 * 06/25/01 grhoten Remove dependency on iostream. 20 ****************************************************************************** 21 */ 22 23 #ifndef UNISTR_H 24 #define UNISTR_H 25 26 /** 27 * \file 28 * \brief C++ API: Unicode String 29 */ 30 31 #include "unicode/utypes.h" 32 33 #if U_SHOW_CPLUSPLUS_API 34 35 #include <cstddef> 36 #include <string_view> 37 #include "unicode/char16ptr.h" 38 #include "unicode/rep.h" 39 #include "unicode/std_string.h" 40 #include "unicode/stringpiece.h" 41 #include "unicode/bytestream.h" 42 43 struct UConverter; // unicode/ucnv.h 44 45 #ifndef USTRING_H 46 /** 47 * \ingroup ustring_ustrlen 48 * @param s Pointer to sequence of UChars. 49 * @return Length of sequence. 50 */ 51 U_CAPI int32_t U_EXPORT2 u_strlen(const UChar *s); 52 #endif 53 54 U_NAMESPACE_BEGIN 55 56 #if !UCONFIG_NO_BREAK_ITERATION 57 class BreakIterator; // unicode/brkiter.h 58 #endif 59 class Edits; 60 61 U_NAMESPACE_END 62 63 // Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper. 64 /** 65 * Internal string case mapping function type. 66 * All error checking must be done. 67 * src and dest must not overlap. 68 * @internal 69 */ 70 typedef int32_t U_CALLCONV 71 UStringCaseMapper(int32_t caseLocale, uint32_t options, 72 #if !UCONFIG_NO_BREAK_ITERATION 73 icu::BreakIterator *iter, 74 #endif 75 char16_t *dest, int32_t destCapacity, 76 const char16_t *src, int32_t srcLength, 77 icu::Edits *edits, 78 UErrorCode &errorCode); 79 80 U_NAMESPACE_BEGIN 81 82 class Locale; // unicode/locid.h 83 class StringCharacterIterator; 84 class UnicodeStringAppendable; // unicode/appendable.h 85 86 /* The <iostream> include has been moved to unicode/ustream.h */ 87 88 /** 89 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor 90 * which constructs a Unicode string from an invariant-character char * string. 91 * About invariant characters see utypes.h. 92 * This constructor has no runtime dependency on conversion code and is 93 * therefore recommended over ones taking a charset name string 94 * (where the empty string "" indicates invariant-character conversion). 95 * 96 * @stable ICU 3.2 97 */ 98 #define US_INV icu::UnicodeString::kInvariant 99 100 /** 101 * \def UNICODE_STRING 102 * Obsolete macro approximating UnicodeString literals. 103 * 104 * Prior to the availability of C++11 and u"UTF-16 string literals", 105 * this macro was provided for portability and efficiency when 106 * initializing UnicodeStrings from literals. 107 * 108 * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time 109 * length determination: 110 * \code 111 * UnicodeString str(u"literal"); 112 * if (str == u"other literal") { ... } 113 * \endcode 114 * 115 * The string parameter must be a C string literal. 116 * The length of the string, not including the terminating 117 * `NUL`, must be specified as a constant. 118 * @stable ICU 2.0 119 */ 120 #if !U_CHAR16_IS_TYPEDEF 121 # define UNICODE_STRING(cs, _length) icu::UnicodeString(true, u ## cs, _length) 122 #else 123 # define UNICODE_STRING(cs, _length) icu::UnicodeString(true, (const char16_t*)u ## cs, _length) 124 #endif 125 126 /** 127 * Unicode String literals in C++. 128 * Obsolete macro approximating UnicodeString literals. 129 * See UNICODE_STRING. 130 * 131 * The string parameter must be a C string literal. 132 * @stable ICU 2.0 133 * @see UNICODE_STRING 134 */ 135 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1) 136 137 /** 138 * \def UNISTR_FROM_CHAR_EXPLICIT 139 * This can be defined to be empty or "explicit". 140 * If explicit, then the UnicodeString(char16_t) and UnicodeString(UChar32) 141 * constructors are marked as explicit, preventing their inadvertent use. 142 * @stable ICU 49 143 */ 144 #ifndef UNISTR_FROM_CHAR_EXPLICIT 145 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) 146 // Auto-"explicit" in ICU library code. 147 # define UNISTR_FROM_CHAR_EXPLICIT explicit 148 # else 149 // Empty by default for source code compatibility. 150 # define UNISTR_FROM_CHAR_EXPLICIT 151 # endif 152 #endif 153 154 /** 155 * \def UNISTR_FROM_STRING_EXPLICIT 156 * This can be defined to be empty or "explicit". 157 * If explicit, then the UnicodeString(const char *) and UnicodeString(const char16_t *) 158 * constructors are marked as explicit, preventing their inadvertent use. 159 * 160 * In particular, this helps prevent accidentally depending on ICU conversion code 161 * by passing a string literal into an API with a const UnicodeString & parameter. 162 * @stable ICU 49 163 */ 164 #ifndef UNISTR_FROM_STRING_EXPLICIT 165 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) 166 // Auto-"explicit" in ICU library code. 167 # define UNISTR_FROM_STRING_EXPLICIT explicit 168 # else 169 // Empty by default for source code compatibility. 170 # define UNISTR_FROM_STRING_EXPLICIT 171 # endif 172 #endif 173 174 /** 175 * \def UNISTR_OBJECT_SIZE 176 * Desired sizeof(UnicodeString) in bytes. 177 * It should be a multiple of sizeof(pointer) to avoid unusable space for padding. 178 * The object size may want to be a multiple of 16 bytes, 179 * which is a common granularity for heap allocation. 180 * 181 * Any space inside the object beyond sizeof(vtable pointer) + 2 182 * is available for storing short strings inside the object. 183 * The bigger the object, the longer a string that can be stored inside the object, 184 * without additional heap allocation. 185 * 186 * Depending on a platform's pointer size, pointer alignment requirements, 187 * and struct padding, the compiler will usually round up sizeof(UnicodeString) 188 * to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models), 189 * to hold the fields for heap-allocated strings. 190 * Such a minimum size also ensures that the object is easily large enough 191 * to hold at least 2 char16_ts, for one supplementary code point (U16_MAX_LENGTH). 192 * 193 * sizeof(UnicodeString) >= 48 should work for all known platforms. 194 * 195 * For example, on a 64-bit machine where sizeof(vtable pointer) is 8, 196 * sizeof(UnicodeString) = 64 would leave space for 197 * (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27 198 * char16_ts stored inside the object. 199 * 200 * The minimum object size on a 64-bit machine would be 201 * 4 * sizeof(pointer) = 4 * 8 = 32 bytes, 202 * and the internal buffer would hold up to 11 char16_ts in that case. 203 * 204 * @see U16_MAX_LENGTH 205 * @stable ICU 56 206 */ 207 #ifndef UNISTR_OBJECT_SIZE 208 # define UNISTR_OBJECT_SIZE 64 209 #endif 210 211 /** 212 * UnicodeString is a string class that stores Unicode characters directly and provides 213 * similar functionality as the Java String and StringBuffer/StringBuilder classes. 214 * It is a concrete implementation of the abstract class Replaceable (for transliteration). 215 * 216 * The UnicodeString equivalent of std::string’s clear() is remove(). 217 * 218 * Starting with ICU 78, a UnicodeString is a C++ "range" of char16_t code units. 219 * utfStringCodePoints() and unsafeUTFStringCodePoints() can be used to iterate over 220 * the code points. 221 * 222 * A UnicodeString may "alias" an external array of characters 223 * (that is, point to it, rather than own the array) 224 * whose lifetime must then at least match the lifetime of the aliasing object. 225 * This aliasing may be preserved when returning a UnicodeString by value, 226 * depending on the compiler and the function implementation, 227 * via Return Value Optimization (RVO) or the move assignment operator. 228 * (However, the copy assignment operator does not preserve aliasing.) 229 * For details see the description of storage models at the end of the class API docs 230 * and in the User Guide chapter linked from there. 231 * 232 * The UnicodeString class is not suitable for subclassing. 233 * 234 * For an overview of Unicode strings in C and C++ see the 235 * [User Guide Strings chapter](https://unicode-org.github.io/icu/userguide/strings#strings-in-cc). 236 * 237 * In ICU, a Unicode string consists of 16-bit Unicode *code units*. 238 * A Unicode character may be stored with either one code unit 239 * (the most common case) or with a matched pair of special code units 240 * ("surrogates"). The data type for code units is char16_t. 241 * For single-character handling, a Unicode character code *point* is a value 242 * in the range 0..0x10ffff. ICU uses the UChar32 type for code points. 243 * 244 * Indexes and offsets into and lengths of strings always count code units, not code points. 245 * This is the same as with multi-byte char* strings in traditional string handling. 246 * Operations on partial strings typically do not test for code point boundaries. 247 * If necessary, the user needs to take care of such boundaries by testing for the code unit 248 * values or by using functions like 249 * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit() 250 * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h). 251 * 252 * UnicodeString methods are more lenient with regard to input parameter values 253 * than other ICU APIs. In particular: 254 * - If indexes are out of bounds for a UnicodeString object 255 * (< 0 or > length()) then they are "pinned" to the nearest boundary. 256 * - If the buffer passed to an insert/append/replace operation is owned by the 257 * target object, e.g., calling str.append(str), an extra copy may take place 258 * to ensure safety. 259 * - If primitive string pointer values (e.g., const char16_t * or char *) 260 * for input strings are nullptr, then those input string parameters are treated 261 * as if they pointed to an empty string. 262 * However, this is *not* the case for char * parameters for charset names 263 * or other IDs. 264 * - Most UnicodeString methods do not take a UErrorCode parameter because 265 * there are usually very few opportunities for failure other than a shortage 266 * of memory, error codes in low-level C++ string methods would be inconvenient, 267 * and the error code as the last parameter (ICU convention) would prevent 268 * the use of default parameter values. 269 * Instead, such methods set the UnicodeString into a "bogus" state 270 * (see isBogus()) if an error occurs. 271 * 272 * In string comparisons, two UnicodeString objects that are both "bogus" 273 * compare equal (to be transitive and prevent endless loops in sorting), 274 * and a "bogus" string compares less than any non-"bogus" one. 275 * 276 * Const UnicodeString methods are thread-safe. Multiple threads can use 277 * const methods on the same UnicodeString object simultaneously, 278 * but non-const methods must not be called concurrently (in multiple threads) 279 * with any other (const or non-const) methods. 280 * 281 * Similarly, const UnicodeString & parameters are thread-safe. 282 * One object may be passed in as such a parameter concurrently in multiple threads. 283 * This includes the const UnicodeString & parameters for 284 * copy construction, assignment, and cloning. 285 * 286 * UnicodeString uses several storage methods. 287 * String contents can be stored inside the UnicodeString object itself, 288 * in an allocated and shared buffer, or in an outside buffer that is "aliased". 289 * Most of this is done transparently, but careful aliasing in particular provides 290 * significant performance improvements. 291 * Also, the internal buffer is accessible via special functions. 292 * For details see the 293 * [User Guide Strings chapter](https://unicode-org.github.io/icu/userguide/strings#maximizing-performance-with-the-unicodestring-storage-model). 294 * 295 * @see utf.h 296 * @see utfiterator.h 297 * @see utfStringCodePoints 298 * @see unsafeUTFStringCodePoints 299 * @see CharacterIterator 300 * @stable ICU 2.0 301 */ 302 class U_COMMON_API UnicodeString : public Replaceable 303 { 304 public: 305 /** C++ boilerplate @internal */ 306 using value_type = char16_t; 307 308 /** 309 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor 310 * which constructs a Unicode string from an invariant-character char * string. 311 * Use the macro US_INV instead of the full qualification for this value. 312 * 313 * @see US_INV 314 * @stable ICU 3.2 315 */ 316 enum EInvariant { 317 /** 318 * @see EInvariant 319 * @stable ICU 3.2 320 */ 321 kInvariant 322 }; 323 324 //======================================== 325 // Read-only operations 326 //======================================== 327 328 /* Comparison - bitwise only - for international comparison use collation */ 329 330 /** 331 * Equality operator. Performs only bitwise comparison. 332 * @param text The UnicodeString to compare to this one. 333 * @return true if `text` contains the same characters as this one, 334 * false otherwise. 335 * @stable ICU 2.0 336 */ 337 inline bool operator== (const UnicodeString& text) const; 338 339 /** 340 * Equality operator. Performs only bitwise comparison with `text` 341 * which is, or which is implicitly convertible to, 342 * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view. 343 * 344 * For performance, you can use UTF-16 string literals with compile-time 345 * length determination: 346 * \code 347 * UnicodeString str = ...; 348 * if (str == u"literal") { ... } 349 * \endcode 350 * @param text The string view to compare to this string. 351 * @return true if `text` contains the same characters as this one, false otherwise. 352 * @stable ICU 76 353 */ 354 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>> 355 inline bool operator==(const S &text) const { 356 std::u16string_view sv(internal::toU16StringView(text)); 357 uint32_t len; // unsigned to avoid a compiler warning 358 return !isBogus() && (len = length()) == sv.length() && doEquals(sv.data(), len); 359 } 360 361 /** 362 * Inequality operator. Performs only bitwise comparison. 363 * @param text The UnicodeString to compare to this one. 364 * @return false if `text` contains the same characters as this one, 365 * true otherwise. 366 * @stable ICU 2.0 367 */ 368 inline bool operator!= (const UnicodeString& text) const; 369 370 /** 371 * Inequality operator. Performs only bitwise comparison with `text` 372 * which is, or which is implicitly convertible to, 373 * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view. 374 * 375 * For performance, you can use std::u16string_view literals with compile-time 376 * length determination: 377 * \code 378 * #include <string_view> 379 * using namespace std::string_view_literals; 380 * UnicodeString str = ...; 381 * if (str != u"literal"sv) { ... } 382 * \endcode 383 * @param text The string view to compare to this string. 384 * @return false if `text` contains the same characters as this one, true otherwise. 385 * @stable ICU 76 386 */ 387 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>> 388 inline bool operator!=(const S &text) const { 389 return !operator==(text); 390 } 391 392 /** 393 * Greater than operator. Performs only bitwise comparison. 394 * @param text The UnicodeString to compare to this one. 395 * @return true if the characters in this are bitwise 396 * greater than the characters in `text`, false otherwise 397 * @stable ICU 2.0 398 */ 399 inline UBool operator> (const UnicodeString& text) const; 400 401 /** 402 * Less than operator. Performs only bitwise comparison. 403 * @param text The UnicodeString to compare to this one. 404 * @return true if the characters in this are bitwise 405 * less than the characters in `text`, false otherwise 406 * @stable ICU 2.0 407 */ 408 inline UBool operator< (const UnicodeString& text) const; 409 410 /** 411 * Greater than or equal operator. Performs only bitwise comparison. 412 * @param text The UnicodeString to compare to this one. 413 * @return true if the characters in this are bitwise 414 * greater than or equal to the characters in `text`, false otherwise 415 * @stable ICU 2.0 416 */ 417 inline UBool operator>= (const UnicodeString& text) const; 418 419 /** 420 * Less than or equal operator. Performs only bitwise comparison. 421 * @param text The UnicodeString to compare to this one. 422 * @return true if the characters in this are bitwise 423 * less than or equal to the characters in `text`, false otherwise 424 * @stable ICU 2.0 425 */ 426 inline UBool operator<= (const UnicodeString& text) const; 427 428 /** 429 * Compare the characters bitwise in this UnicodeString to 430 * the characters in `text`. 431 * @param text The UnicodeString to compare to this one. 432 * @return The result of bitwise character comparison: 0 if this 433 * contains the same characters as `text`, -1 if the characters in 434 * this are bitwise less than the characters in `text`, +1 if the 435 * characters in this are bitwise greater than the characters 436 * in `text`. 437 * @stable ICU 2.0 438 */ 439 inline int8_t compare(const UnicodeString& text) const; 440 441 /** 442 * Compare the characters bitwise in the range 443 * [`start`, `start + length`) with the characters 444 * in the **entire string** `text`. 445 * (The parameters "start" and "length" are not applied to the other text "text".) 446 * @param start the offset at which the compare operation begins 447 * @param length the number of characters of text to compare. 448 * @param text the other text to be compared against this string. 449 * @return The result of bitwise character comparison: 0 if this 450 * contains the same characters as `text`, -1 if the characters in 451 * this are bitwise less than the characters in `text`, +1 if the 452 * characters in this are bitwise greater than the characters 453 * in `text`. 454 * @stable ICU 2.0 455 */ 456 inline int8_t compare(int32_t start, 457 int32_t length, 458 const UnicodeString& text) const; 459 460 /** 461 * Compare the characters bitwise in the range 462 * [`start`, `start + length`) with the characters 463 * in `srcText` in the range 464 * [`srcStart`, `srcStart + srcLength`). 465 * @param start the offset at which the compare operation begins 466 * @param length the number of characters in this to compare. 467 * @param srcText the text to be compared 468 * @param srcStart the offset into `srcText` to start comparison 469 * @param srcLength the number of characters in `src` to compare 470 * @return The result of bitwise character comparison: 0 if this 471 * contains the same characters as `srcText`, -1 if the characters in 472 * this are bitwise less than the characters in `srcText`, +1 if the 473 * characters in this are bitwise greater than the characters 474 * in `srcText`. 475 * @stable ICU 2.0 476 */ 477 inline int8_t compare(int32_t start, 478 int32_t length, 479 const UnicodeString& srcText, 480 int32_t srcStart, 481 int32_t srcLength) const; 482 483 /** 484 * Compare the characters bitwise in this UnicodeString with the first 485 * `srcLength` characters in `srcChars`. 486 * @param srcChars The characters to compare to this UnicodeString. 487 * @param srcLength the number of characters in `srcChars` to compare 488 * @return The result of bitwise character comparison: 0 if this 489 * contains the same characters as `srcChars`, -1 if the characters in 490 * this are bitwise less than the characters in `srcChars`, +1 if the 491 * characters in this are bitwise greater than the characters 492 * in `srcChars`. 493 * @stable ICU 2.0 494 */ 495 inline int8_t compare(ConstChar16Ptr srcChars, 496 int32_t srcLength) const; 497 498 /** 499 * Compare the characters bitwise in the range 500 * [`start`, `start + length`) with the first 501 * `length` characters in `srcChars` 502 * @param start the offset at which the compare operation begins 503 * @param length the number of characters to compare. 504 * @param srcChars the characters to be compared 505 * @return The result of bitwise character comparison: 0 if this 506 * contains the same characters as `srcChars`, -1 if the characters in 507 * this are bitwise less than the characters in `srcChars`, +1 if the 508 * characters in this are bitwise greater than the characters 509 * in `srcChars`. 510 * @stable ICU 2.0 511 */ 512 inline int8_t compare(int32_t start, 513 int32_t length, 514 const char16_t *srcChars) const; 515 516 /** 517 * Compare the characters bitwise in the range 518 * [`start`, `start + length`) with the characters 519 * in `srcChars` in the range 520 * [`srcStart`, `srcStart + srcLength`). 521 * @param start the offset at which the compare operation begins 522 * @param length the number of characters in this to compare 523 * @param srcChars the characters to be compared 524 * @param srcStart the offset into `srcChars` to start comparison 525 * @param srcLength the number of characters in `srcChars` to compare 526 * @return The result of bitwise character comparison: 0 if this 527 * contains the same characters as `srcChars`, -1 if the characters in 528 * this are bitwise less than the characters in `srcChars`, +1 if the 529 * characters in this are bitwise greater than the characters 530 * in `srcChars`. 531 * @stable ICU 2.0 532 */ 533 inline int8_t compare(int32_t start, 534 int32_t length, 535 const char16_t *srcChars, 536 int32_t srcStart, 537 int32_t srcLength) const; 538 539 /** 540 * Compare the characters bitwise in the range 541 * [`start`, `limit`) with the characters 542 * in `srcText` in the range 543 * [`srcStart`, `srcLimit`). 544 * @param start the offset at which the compare operation begins 545 * @param limit the offset immediately following the compare operation 546 * @param srcText the text to be compared 547 * @param srcStart the offset into `srcText` to start comparison 548 * @param srcLimit the offset into `srcText` to limit comparison 549 * @return The result of bitwise character comparison: 0 if this 550 * contains the same characters as `srcText`, -1 if the characters in 551 * this are bitwise less than the characters in `srcText`, +1 if the 552 * characters in this are bitwise greater than the characters 553 * in `srcText`. 554 * @stable ICU 2.0 555 */ 556 inline int8_t compareBetween(int32_t start, 557 int32_t limit, 558 const UnicodeString& srcText, 559 int32_t srcStart, 560 int32_t srcLimit) const; 561 562 /** 563 * Compare two Unicode strings in code point order. 564 * The result may be different from the results of compare(), operator<, etc. 565 * if supplementary characters are present: 566 * 567 * In UTF-16, supplementary characters (with code points U+10000 and above) are 568 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 569 * which means that they compare as less than some other BMP characters like U+feff. 570 * This function compares Unicode strings in code point order. 571 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 572 * 573 * @param text Another string to compare this one to. 574 * @return a negative/zero/positive integer corresponding to whether 575 * this string is less than/equal to/greater than the second one 576 * in code point order 577 * @stable ICU 2.0 578 */ 579 inline int8_t compareCodePointOrder(const UnicodeString& text) const; 580 581 /** 582 * Compare two Unicode strings in code point order. 583 * The result may be different from the results of compare(), operator<, etc. 584 * if supplementary characters are present: 585 * 586 * In UTF-16, supplementary characters (with code points U+10000 and above) are 587 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 588 * which means that they compare as less than some other BMP characters like U+feff. 589 * This function compares Unicode strings in code point order. 590 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 591 * 592 * @param start The start offset in this string at which the compare operation begins. 593 * @param length The number of code units from this string to compare. 594 * @param srcText Another string to compare this one to. 595 * @return a negative/zero/positive integer corresponding to whether 596 * this string is less than/equal to/greater than the second one 597 * in code point order 598 * @stable ICU 2.0 599 */ 600 inline int8_t compareCodePointOrder(int32_t start, 601 int32_t length, 602 const UnicodeString& srcText) const; 603 604 /** 605 * Compare two Unicode strings in code point order. 606 * The result may be different from the results of compare(), operator<, etc. 607 * if supplementary characters are present: 608 * 609 * In UTF-16, supplementary characters (with code points U+10000 and above) are 610 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 611 * which means that they compare as less than some other BMP characters like U+feff. 612 * This function compares Unicode strings in code point order. 613 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 614 * 615 * @param start The start offset in this string at which the compare operation begins. 616 * @param length The number of code units from this string to compare. 617 * @param srcText Another string to compare this one to. 618 * @param srcStart The start offset in that string at which the compare operation begins. 619 * @param srcLength The number of code units from that string to compare. 620 * @return a negative/zero/positive integer corresponding to whether 621 * this string is less than/equal to/greater than the second one 622 * in code point order 623 * @stable ICU 2.0 624 */ 625 inline int8_t compareCodePointOrder(int32_t start, 626 int32_t length, 627 const UnicodeString& srcText, 628 int32_t srcStart, 629 int32_t srcLength) const; 630 631 /** 632 * Compare two Unicode strings in code point order. 633 * The result may be different from the results of compare(), operator<, etc. 634 * if supplementary characters are present: 635 * 636 * In UTF-16, supplementary characters (with code points U+10000 and above) are 637 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 638 * which means that they compare as less than some other BMP characters like U+feff. 639 * This function compares Unicode strings in code point order. 640 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 641 * 642 * @param srcChars A pointer to another string to compare this one to. 643 * @param srcLength The number of code units from that string to compare. 644 * @return a negative/zero/positive integer corresponding to whether 645 * this string is less than/equal to/greater than the second one 646 * in code point order 647 * @stable ICU 2.0 648 */ 649 inline int8_t compareCodePointOrder(ConstChar16Ptr srcChars, 650 int32_t srcLength) const; 651 652 /** 653 * Compare two Unicode strings in code point order. 654 * The result may be different from the results of compare(), operator<, etc. 655 * if supplementary characters are present: 656 * 657 * In UTF-16, supplementary characters (with code points U+10000 and above) are 658 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 659 * which means that they compare as less than some other BMP characters like U+feff. 660 * This function compares Unicode strings in code point order. 661 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 662 * 663 * @param start The start offset in this string at which the compare operation begins. 664 * @param length The number of code units from this string to compare. 665 * @param srcChars A pointer to another string to compare this one to. 666 * @return a negative/zero/positive integer corresponding to whether 667 * this string is less than/equal to/greater than the second one 668 * in code point order 669 * @stable ICU 2.0 670 */ 671 inline int8_t compareCodePointOrder(int32_t start, 672 int32_t length, 673 const char16_t *srcChars) const; 674 675 /** 676 * Compare two Unicode strings in code point order. 677 * The result may be different from the results of compare(), operator<, etc. 678 * if supplementary characters are present: 679 * 680 * In UTF-16, supplementary characters (with code points U+10000 and above) are 681 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 682 * which means that they compare as less than some other BMP characters like U+feff. 683 * This function compares Unicode strings in code point order. 684 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 685 * 686 * @param start The start offset in this string at which the compare operation begins. 687 * @param length The number of code units from this string to compare. 688 * @param srcChars A pointer to another string to compare this one to. 689 * @param srcStart The start offset in that string at which the compare operation begins. 690 * @param srcLength The number of code units from that string to compare. 691 * @return a negative/zero/positive integer corresponding to whether 692 * this string is less than/equal to/greater than the second one 693 * in code point order 694 * @stable ICU 2.0 695 */ 696 inline int8_t compareCodePointOrder(int32_t start, 697 int32_t length, 698 const char16_t *srcChars, 699 int32_t srcStart, 700 int32_t srcLength) const; 701 702 /** 703 * Compare two Unicode strings in code point order. 704 * The result may be different from the results of compare(), operator<, etc. 705 * if supplementary characters are present: 706 * 707 * In UTF-16, supplementary characters (with code points U+10000 and above) are 708 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 709 * which means that they compare as less than some other BMP characters like U+feff. 710 * This function compares Unicode strings in code point order. 711 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 712 * 713 * @param start The start offset in this string at which the compare operation begins. 714 * @param limit The offset after the last code unit from this string to compare. 715 * @param srcText Another string to compare this one to. 716 * @param srcStart The start offset in that string at which the compare operation begins. 717 * @param srcLimit The offset after the last code unit from that string to compare. 718 * @return a negative/zero/positive integer corresponding to whether 719 * this string is less than/equal to/greater than the second one 720 * in code point order 721 * @stable ICU 2.0 722 */ 723 inline int8_t compareCodePointOrderBetween(int32_t start, 724 int32_t limit, 725 const UnicodeString& srcText, 726 int32_t srcStart, 727 int32_t srcLimit) const; 728 729 /** 730 * Compare two strings case-insensitively using full case folding. 731 * This is equivalent to this->foldCase(options).compare(text.foldCase(options)). 732 * 733 * @param text Another string to compare this one to. 734 * @param options A bit set of options: 735 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 736 * Comparison in code unit order with default case folding. 737 * 738 * - U_COMPARE_CODE_POINT_ORDER 739 * Set to choose code point order instead of code unit order 740 * (see u_strCompare for details). 741 * 742 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 743 * 744 * @return A negative, zero, or positive integer indicating the comparison result. 745 * @stable ICU 2.0 746 */ 747 inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const; 748 749 /** 750 * Compare two strings case-insensitively using full case folding. 751 * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)). 752 * 753 * @param start The start offset in this string at which the compare operation begins. 754 * @param length The number of code units from this string to compare. 755 * @param srcText Another string to compare this one to. 756 * @param options A bit set of options: 757 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 758 * Comparison in code unit order with default case folding. 759 * 760 * - U_COMPARE_CODE_POINT_ORDER 761 * Set to choose code point order instead of code unit order 762 * (see u_strCompare for details). 763 * 764 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 765 * 766 * @return A negative, zero, or positive integer indicating the comparison result. 767 * @stable ICU 2.0 768 */ 769 inline int8_t caseCompare(int32_t start, 770 int32_t length, 771 const UnicodeString& srcText, 772 uint32_t options) const; 773 774 /** 775 * Compare two strings case-insensitively using full case folding. 776 * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)). 777 * 778 * @param start The start offset in this string at which the compare operation begins. 779 * @param length The number of code units from this string to compare. 780 * @param srcText Another string to compare this one to. 781 * @param srcStart The start offset in that string at which the compare operation begins. 782 * @param srcLength The number of code units from that string to compare. 783 * @param options A bit set of options: 784 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 785 * Comparison in code unit order with default case folding. 786 * 787 * - U_COMPARE_CODE_POINT_ORDER 788 * Set to choose code point order instead of code unit order 789 * (see u_strCompare for details). 790 * 791 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 792 * 793 * @return A negative, zero, or positive integer indicating the comparison result. 794 * @stable ICU 2.0 795 */ 796 inline int8_t caseCompare(int32_t start, 797 int32_t length, 798 const UnicodeString& srcText, 799 int32_t srcStart, 800 int32_t srcLength, 801 uint32_t options) const; 802 803 /** 804 * Compare two strings case-insensitively using full case folding. 805 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). 806 * 807 * @param srcChars A pointer to another string to compare this one to. 808 * @param srcLength The number of code units from that string to compare. 809 * @param options A bit set of options: 810 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 811 * Comparison in code unit order with default case folding. 812 * 813 * - U_COMPARE_CODE_POINT_ORDER 814 * Set to choose code point order instead of code unit order 815 * (see u_strCompare for details). 816 * 817 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 818 * 819 * @return A negative, zero, or positive integer indicating the comparison result. 820 * @stable ICU 2.0 821 */ 822 inline int8_t caseCompare(ConstChar16Ptr srcChars, 823 int32_t srcLength, 824 uint32_t options) const; 825 826 /** 827 * Compare two strings case-insensitively using full case folding. 828 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). 829 * 830 * @param start The start offset in this string at which the compare operation begins. 831 * @param length The number of code units from this string to compare. 832 * @param srcChars A pointer to another string to compare this one to. 833 * @param options A bit set of options: 834 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 835 * Comparison in code unit order with default case folding. 836 * 837 * - U_COMPARE_CODE_POINT_ORDER 838 * Set to choose code point order instead of code unit order 839 * (see u_strCompare for details). 840 * 841 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 842 * 843 * @return A negative, zero, or positive integer indicating the comparison result. 844 * @stable ICU 2.0 845 */ 846 inline int8_t caseCompare(int32_t start, 847 int32_t length, 848 const char16_t *srcChars, 849 uint32_t options) const; 850 851 /** 852 * Compare two strings case-insensitively using full case folding. 853 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). 854 * 855 * @param start The start offset in this string at which the compare operation begins. 856 * @param length The number of code units from this string to compare. 857 * @param srcChars A pointer to another string to compare this one to. 858 * @param srcStart The start offset in that string at which the compare operation begins. 859 * @param srcLength The number of code units from that string to compare. 860 * @param options A bit set of options: 861 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 862 * Comparison in code unit order with default case folding. 863 * 864 * - U_COMPARE_CODE_POINT_ORDER 865 * Set to choose code point order instead of code unit order 866 * (see u_strCompare for details). 867 * 868 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 869 * 870 * @return A negative, zero, or positive integer indicating the comparison result. 871 * @stable ICU 2.0 872 */ 873 inline int8_t caseCompare(int32_t start, 874 int32_t length, 875 const char16_t *srcChars, 876 int32_t srcStart, 877 int32_t srcLength, 878 uint32_t options) const; 879 880 /** 881 * Compare two strings case-insensitively using full case folding. 882 * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)). 883 * 884 * @param start The start offset in this string at which the compare operation begins. 885 * @param limit The offset after the last code unit from this string to compare. 886 * @param srcText Another string to compare this one to. 887 * @param srcStart The start offset in that string at which the compare operation begins. 888 * @param srcLimit The offset after the last code unit from that string to compare. 889 * @param options A bit set of options: 890 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 891 * Comparison in code unit order with default case folding. 892 * 893 * - U_COMPARE_CODE_POINT_ORDER 894 * Set to choose code point order instead of code unit order 895 * (see u_strCompare for details). 896 * 897 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 898 * 899 * @return A negative, zero, or positive integer indicating the comparison result. 900 * @stable ICU 2.0 901 */ 902 inline int8_t caseCompareBetween(int32_t start, 903 int32_t limit, 904 const UnicodeString& srcText, 905 int32_t srcStart, 906 int32_t srcLimit, 907 uint32_t options) const; 908 909 /** 910 * Determine if this starts with the characters in `text` 911 * @param text The text to match. 912 * @return true if this starts with the characters in `text`, 913 * false otherwise 914 * @stable ICU 2.0 915 */ 916 inline UBool startsWith(const UnicodeString& text) const; 917 918 /** 919 * Determine if this starts with the characters in `srcText` 920 * in the range [`srcStart`, `srcStart + srcLength`). 921 * @param srcText The text to match. 922 * @param srcStart the offset into `srcText` to start matching 923 * @param srcLength the number of characters in `srcText` to match 924 * @return true if this starts with the characters in `text`, 925 * false otherwise 926 * @stable ICU 2.0 927 */ 928 inline UBool startsWith(const UnicodeString& srcText, 929 int32_t srcStart, 930 int32_t srcLength) const; 931 932 /** 933 * Determine if this starts with the characters in `srcChars` 934 * @param srcChars The characters to match. 935 * @param srcLength the number of characters in `srcChars` 936 * @return true if this starts with the characters in `srcChars`, 937 * false otherwise 938 * @stable ICU 2.0 939 */ 940 inline UBool startsWith(ConstChar16Ptr srcChars, 941 int32_t srcLength) const; 942 943 /** 944 * Determine if this ends with the characters in `srcChars` 945 * in the range [`srcStart`, `srcStart + srcLength`). 946 * @param srcChars The characters to match. 947 * @param srcStart the offset into `srcText` to start matching 948 * @param srcLength the number of characters in `srcChars` to match 949 * @return true if this ends with the characters in `srcChars`, false otherwise 950 * @stable ICU 2.0 951 */ 952 inline UBool startsWith(const char16_t *srcChars, 953 int32_t srcStart, 954 int32_t srcLength) const; 955 956 /** 957 * Determine if this ends with the characters in `text` 958 * @param text The text to match. 959 * @return true if this ends with the characters in `text`, 960 * false otherwise 961 * @stable ICU 2.0 962 */ 963 inline UBool endsWith(const UnicodeString& text) const; 964 965 /** 966 * Determine if this ends with the characters in `srcText` 967 * in the range [`srcStart`, `srcStart + srcLength`). 968 * @param srcText The text to match. 969 * @param srcStart the offset into `srcText` to start matching 970 * @param srcLength the number of characters in `srcText` to match 971 * @return true if this ends with the characters in `text`, 972 * false otherwise 973 * @stable ICU 2.0 974 */ 975 inline UBool endsWith(const UnicodeString& srcText, 976 int32_t srcStart, 977 int32_t srcLength) const; 978 979 /** 980 * Determine if this ends with the characters in `srcChars` 981 * @param srcChars The characters to match. 982 * @param srcLength the number of characters in `srcChars` 983 * @return true if this ends with the characters in `srcChars`, 984 * false otherwise 985 * @stable ICU 2.0 986 */ 987 inline UBool endsWith(ConstChar16Ptr srcChars, 988 int32_t srcLength) const; 989 990 /** 991 * Determine if this ends with the characters in `srcChars` 992 * in the range [`srcStart`, `srcStart + srcLength`). 993 * @param srcChars The characters to match. 994 * @param srcStart the offset into `srcText` to start matching 995 * @param srcLength the number of characters in `srcChars` to match 996 * @return true if this ends with the characters in `srcChars`, 997 * false otherwise 998 * @stable ICU 2.0 999 */ 1000 inline UBool endsWith(const char16_t *srcChars, 1001 int32_t srcStart, 1002 int32_t srcLength) const; 1003 1004 1005 /* Searching - bitwise only */ 1006 1007 /** 1008 * Locate in this the first occurrence of the characters in `text`, 1009 * using bitwise comparison. 1010 * @param text The text to search for. 1011 * @return The offset into this of the start of `text`, 1012 * or -1 if not found. 1013 * @stable ICU 2.0 1014 */ 1015 inline int32_t indexOf(const UnicodeString& text) const; 1016 1017 /** 1018 * Locate in this the first occurrence of the characters in `text` 1019 * starting at offset `start`, using bitwise comparison. 1020 * @param text The text to search for. 1021 * @param start The offset at which searching will start. 1022 * @return The offset into this of the start of `text`, 1023 * or -1 if not found. 1024 * @stable ICU 2.0 1025 */ 1026 inline int32_t indexOf(const UnicodeString& text, 1027 int32_t start) const; 1028 1029 /** 1030 * Locate in this the first occurrence in the range 1031 * [`start`, `start + length`) of the characters 1032 * in `text`, using bitwise comparison. 1033 * @param text The text to search for. 1034 * @param start The offset at which searching will start. 1035 * @param length The number of characters to search 1036 * @return The offset into this of the start of `text`, 1037 * or -1 if not found. 1038 * @stable ICU 2.0 1039 */ 1040 inline int32_t indexOf(const UnicodeString& text, 1041 int32_t start, 1042 int32_t length) const; 1043 1044 /** 1045 * Locate in this the first occurrence in the range 1046 * [`start`, `start + length`) of the characters 1047 * in `srcText` in the range 1048 * [`srcStart`, `srcStart + srcLength`), 1049 * using bitwise comparison. 1050 * @param srcText The text to search for. 1051 * @param srcStart the offset into `srcText` at which 1052 * to start matching 1053 * @param srcLength the number of characters in `srcText` to match 1054 * @param start the offset into this at which to start matching 1055 * @param length the number of characters in this to search 1056 * @return The offset into this of the start of `text`, 1057 * or -1 if not found. 1058 * @stable ICU 2.0 1059 */ 1060 inline int32_t indexOf(const UnicodeString& srcText, 1061 int32_t srcStart, 1062 int32_t srcLength, 1063 int32_t start, 1064 int32_t length) const; 1065 1066 /** 1067 * Locate in this the first occurrence of the characters in 1068 * `srcChars` 1069 * starting at offset `start`, using bitwise comparison. 1070 * @param srcChars The text to search for. 1071 * @param srcLength the number of characters in `srcChars` to match 1072 * @param start the offset into this at which to start matching 1073 * @return The offset into this of the start of `text`, 1074 * or -1 if not found. 1075 * @stable ICU 2.0 1076 */ 1077 inline int32_t indexOf(const char16_t *srcChars, 1078 int32_t srcLength, 1079 int32_t start) const; 1080 1081 /** 1082 * Locate in this the first occurrence in the range 1083 * [`start`, `start + length`) of the characters 1084 * in `srcChars`, using bitwise comparison. 1085 * @param srcChars The text to search for. 1086 * @param srcLength the number of characters in `srcChars` 1087 * @param start The offset at which searching will start. 1088 * @param length The number of characters to search 1089 * @return The offset into this of the start of `srcChars`, 1090 * or -1 if not found. 1091 * @stable ICU 2.0 1092 */ 1093 inline int32_t indexOf(ConstChar16Ptr srcChars, 1094 int32_t srcLength, 1095 int32_t start, 1096 int32_t length) const; 1097 1098 /** 1099 * Locate in this the first occurrence in the range 1100 * [`start`, `start + length`) of the characters 1101 * in `srcChars` in the range 1102 * [`srcStart`, `srcStart + srcLength`), 1103 * using bitwise comparison. 1104 * @param srcChars The text to search for. 1105 * @param srcStart the offset into `srcChars` at which 1106 * to start matching 1107 * @param srcLength the number of characters in `srcChars` to match 1108 * @param start the offset into this at which to start matching 1109 * @param length the number of characters in this to search 1110 * @return The offset into this of the start of `text`, 1111 * or -1 if not found. 1112 * @stable ICU 2.0 1113 */ 1114 int32_t indexOf(const char16_t *srcChars, 1115 int32_t srcStart, 1116 int32_t srcLength, 1117 int32_t start, 1118 int32_t length) const; 1119 1120 /** 1121 * Locate in this the first occurrence of the BMP code point `c`, 1122 * using bitwise comparison. 1123 * @param c The code unit to search for. 1124 * @return The offset into this of `c`, or -1 if not found. 1125 * @stable ICU 2.0 1126 */ 1127 inline int32_t indexOf(char16_t c) const; 1128 1129 /** 1130 * Locate in this the first occurrence of the code point `c`, 1131 * using bitwise comparison. 1132 * 1133 * @param c The code point to search for. 1134 * @return The offset into this of `c`, or -1 if not found. 1135 * @stable ICU 2.0 1136 */ 1137 inline int32_t indexOf(UChar32 c) const; 1138 1139 /** 1140 * Locate in this the first occurrence of the BMP code point `c`, 1141 * starting at offset `start`, using bitwise comparison. 1142 * @param c The code unit to search for. 1143 * @param start The offset at which searching will start. 1144 * @return The offset into this of `c`, or -1 if not found. 1145 * @stable ICU 2.0 1146 */ 1147 inline int32_t indexOf(char16_t c, 1148 int32_t start) const; 1149 1150 /** 1151 * Locate in this the first occurrence of the code point `c` 1152 * starting at offset `start`, using bitwise comparison. 1153 * 1154 * @param c The code point to search for. 1155 * @param start The offset at which searching will start. 1156 * @return The offset into this of `c`, or -1 if not found. 1157 * @stable ICU 2.0 1158 */ 1159 inline int32_t indexOf(UChar32 c, 1160 int32_t start) const; 1161 1162 /** 1163 * Locate in this the first occurrence of the BMP code point `c` 1164 * in the range [`start`, `start + length`), 1165 * using bitwise comparison. 1166 * @param c The code unit to search for. 1167 * @param start the offset into this at which to start matching 1168 * @param length the number of characters in this to search 1169 * @return The offset into this of `c`, or -1 if not found. 1170 * @stable ICU 2.0 1171 */ 1172 inline int32_t indexOf(char16_t c, 1173 int32_t start, 1174 int32_t length) const; 1175 1176 /** 1177 * Locate in this the first occurrence of the code point `c` 1178 * in the range [`start`, `start + length`), 1179 * using bitwise comparison. 1180 * 1181 * @param c The code point to search for. 1182 * @param start the offset into this at which to start matching 1183 * @param length the number of characters in this to search 1184 * @return The offset into this of `c`, or -1 if not found. 1185 * @stable ICU 2.0 1186 */ 1187 inline int32_t indexOf(UChar32 c, 1188 int32_t start, 1189 int32_t length) const; 1190 1191 /** 1192 * Locate in this the last occurrence of the characters in `text`, 1193 * using bitwise comparison. 1194 * @param text The text to search for. 1195 * @return The offset into this of the start of `text`, 1196 * or -1 if not found. 1197 * @stable ICU 2.0 1198 */ 1199 inline int32_t lastIndexOf(const UnicodeString& text) const; 1200 1201 /** 1202 * Locate in this the last occurrence of the characters in `text` 1203 * starting at offset `start`, using bitwise comparison. 1204 * @param text The text to search for. 1205 * @param start The offset at which searching will start. 1206 * @return The offset into this of the start of `text`, 1207 * or -1 if not found. 1208 * @stable ICU 2.0 1209 */ 1210 inline int32_t lastIndexOf(const UnicodeString& text, 1211 int32_t start) const; 1212 1213 /** 1214 * Locate in this the last occurrence in the range 1215 * [`start`, `start + length`) of the characters 1216 * in `text`, using bitwise comparison. 1217 * @param text The text to search for. 1218 * @param start The offset at which searching will start. 1219 * @param length The number of characters to search 1220 * @return The offset into this of the start of `text`, 1221 * or -1 if not found. 1222 * @stable ICU 2.0 1223 */ 1224 inline int32_t lastIndexOf(const UnicodeString& text, 1225 int32_t start, 1226 int32_t length) const; 1227 1228 /** 1229 * Locate in this the last occurrence in the range 1230 * [`start`, `start + length`) of the characters 1231 * in `srcText` in the range 1232 * [`srcStart`, `srcStart + srcLength`), 1233 * using bitwise comparison. 1234 * @param srcText The text to search for. 1235 * @param srcStart the offset into `srcText` at which 1236 * to start matching 1237 * @param srcLength the number of characters in `srcText` to match 1238 * @param start the offset into this at which to start matching 1239 * @param length the number of characters in this to search 1240 * @return The offset into this of the start of `text`, 1241 * or -1 if not found. 1242 * @stable ICU 2.0 1243 */ 1244 inline int32_t lastIndexOf(const UnicodeString& srcText, 1245 int32_t srcStart, 1246 int32_t srcLength, 1247 int32_t start, 1248 int32_t length) const; 1249 1250 /** 1251 * Locate in this the last occurrence of the characters in `srcChars` 1252 * starting at offset `start`, using bitwise comparison. 1253 * @param srcChars The text to search for. 1254 * @param srcLength the number of characters in `srcChars` to match 1255 * @param start the offset into this at which to start matching 1256 * @return The offset into this of the start of `text`, 1257 * or -1 if not found. 1258 * @stable ICU 2.0 1259 */ 1260 inline int32_t lastIndexOf(const char16_t *srcChars, 1261 int32_t srcLength, 1262 int32_t start) const; 1263 1264 /** 1265 * Locate in this the last occurrence in the range 1266 * [`start`, `start + length`) of the characters 1267 * in `srcChars`, using bitwise comparison. 1268 * @param srcChars The text to search for. 1269 * @param srcLength the number of characters in `srcChars` 1270 * @param start The offset at which searching will start. 1271 * @param length The number of characters to search 1272 * @return The offset into this of the start of `srcChars`, 1273 * or -1 if not found. 1274 * @stable ICU 2.0 1275 */ 1276 inline int32_t lastIndexOf(ConstChar16Ptr srcChars, 1277 int32_t srcLength, 1278 int32_t start, 1279 int32_t length) const; 1280 1281 /** 1282 * Locate in this the last occurrence in the range 1283 * [`start`, `start + length`) of the characters 1284 * in `srcChars` in the range 1285 * [`srcStart`, `srcStart + srcLength`), 1286 * using bitwise comparison. 1287 * @param srcChars The text to search for. 1288 * @param srcStart the offset into `srcChars` at which 1289 * to start matching 1290 * @param srcLength the number of characters in `srcChars` to match 1291 * @param start the offset into this at which to start matching 1292 * @param length the number of characters in this to search 1293 * @return The offset into this of the start of `text`, 1294 * or -1 if not found. 1295 * @stable ICU 2.0 1296 */ 1297 int32_t lastIndexOf(const char16_t *srcChars, 1298 int32_t srcStart, 1299 int32_t srcLength, 1300 int32_t start, 1301 int32_t length) const; 1302 1303 /** 1304 * Locate in this the last occurrence of the BMP code point `c`, 1305 * using bitwise comparison. 1306 * @param c The code unit to search for. 1307 * @return The offset into this of `c`, or -1 if not found. 1308 * @stable ICU 2.0 1309 */ 1310 inline int32_t lastIndexOf(char16_t c) const; 1311 1312 /** 1313 * Locate in this the last occurrence of the code point `c`, 1314 * using bitwise comparison. 1315 * 1316 * @param c The code point to search for. 1317 * @return The offset into this of `c`, or -1 if not found. 1318 * @stable ICU 2.0 1319 */ 1320 inline int32_t lastIndexOf(UChar32 c) const; 1321 1322 /** 1323 * Locate in this the last occurrence of the BMP code point `c` 1324 * starting at offset `start`, using bitwise comparison. 1325 * @param c The code unit to search for. 1326 * @param start The offset at which searching will start. 1327 * @return The offset into this of `c`, or -1 if not found. 1328 * @stable ICU 2.0 1329 */ 1330 inline int32_t lastIndexOf(char16_t c, 1331 int32_t start) const; 1332 1333 /** 1334 * Locate in this the last occurrence of the code point `c` 1335 * starting at offset `start`, using bitwise comparison. 1336 * 1337 * @param c The code point to search for. 1338 * @param start The offset at which searching will start. 1339 * @return The offset into this of `c`, or -1 if not found. 1340 * @stable ICU 2.0 1341 */ 1342 inline int32_t lastIndexOf(UChar32 c, 1343 int32_t start) const; 1344 1345 /** 1346 * Locate in this the last occurrence of the BMP code point `c` 1347 * in the range [`start`, `start + length`), 1348 * using bitwise comparison. 1349 * @param c The code unit to search for. 1350 * @param start the offset into this at which to start matching 1351 * @param length the number of characters in this to search 1352 * @return The offset into this of `c`, or -1 if not found. 1353 * @stable ICU 2.0 1354 */ 1355 inline int32_t lastIndexOf(char16_t c, 1356 int32_t start, 1357 int32_t length) const; 1358 1359 /** 1360 * Locate in this the last occurrence of the code point `c` 1361 * in the range [`start`, `start + length`), 1362 * using bitwise comparison. 1363 * 1364 * @param c The code point to search for. 1365 * @param start the offset into this at which to start matching 1366 * @param length the number of characters in this to search 1367 * @return The offset into this of `c`, or -1 if not found. 1368 * @stable ICU 2.0 1369 */ 1370 inline int32_t lastIndexOf(UChar32 c, 1371 int32_t start, 1372 int32_t length) const; 1373 1374 1375 /* Character access */ 1376 1377 /** 1378 * Return the code unit at offset `offset`. 1379 * If the offset is not valid (0..length()-1) then U+ffff is returned. 1380 * @param offset a valid offset into the text 1381 * @return the code unit at offset `offset` 1382 * or 0xffff if the offset is not valid for this string 1383 * @stable ICU 2.0 1384 */ 1385 inline char16_t charAt(int32_t offset) const; 1386 1387 /** 1388 * Return the code unit at offset `offset`. 1389 * If the offset is not valid (0..length()-1) then U+ffff is returned. 1390 * @param offset a valid offset into the text 1391 * @return the code unit at offset `offset` 1392 * @stable ICU 2.0 1393 */ 1394 inline char16_t operator[] (int32_t offset) const; 1395 1396 /** 1397 * Return the code point that contains the code unit 1398 * at offset `offset`. 1399 * If the offset is not valid (0..length()-1) then U+ffff is returned. 1400 * @param offset a valid offset into the text 1401 * that indicates the text offset of any of the code units 1402 * that will be assembled into a code point (21-bit value) and returned 1403 * @return the code point of text at `offset` 1404 * or 0xffff if the offset is not valid for this string 1405 * @stable ICU 2.0 1406 */ 1407 UChar32 char32At(int32_t offset) const; 1408 1409 /** 1410 * Adjust a random-access offset so that 1411 * it points to the beginning of a Unicode character. 1412 * The offset that is passed in points to 1413 * any code unit of a code point, 1414 * while the returned offset will point to the first code unit 1415 * of the same code point. 1416 * In UTF-16, if the input offset points to a second surrogate 1417 * of a surrogate pair, then the returned offset will point 1418 * to the first surrogate. 1419 * @param offset a valid offset into one code point of the text 1420 * @return offset of the first code unit of the same code point 1421 * @see U16_SET_CP_START 1422 * @stable ICU 2.0 1423 */ 1424 int32_t getChar32Start(int32_t offset) const; 1425 1426 /** 1427 * Adjust a random-access offset so that 1428 * it points behind a Unicode character. 1429 * The offset that is passed in points behind 1430 * any code unit of a code point, 1431 * while the returned offset will point behind the last code unit 1432 * of the same code point. 1433 * In UTF-16, if the input offset points behind the first surrogate 1434 * (i.e., to the second surrogate) 1435 * of a surrogate pair, then the returned offset will point 1436 * behind the second surrogate (i.e., to the first surrogate). 1437 * @param offset a valid offset after any code unit of a code point of the text 1438 * @return offset of the first code unit after the same code point 1439 * @see U16_SET_CP_LIMIT 1440 * @stable ICU 2.0 1441 */ 1442 int32_t getChar32Limit(int32_t offset) const; 1443 1444 /** 1445 * Move the code unit index along the string by delta code points. 1446 * Interpret the input index as a code unit-based offset into the string, 1447 * move the index forward or backward by delta code points, and 1448 * return the resulting index. 1449 * The input index should point to the first code unit of a code point, 1450 * if there is more than one. 1451 * 1452 * Both input and output indexes are code unit-based as for all 1453 * string indexes/offsets in ICU (and other libraries, like MBCS char*). 1454 * If delta<0 then the index is moved backward (toward the start of the string). 1455 * If delta>0 then the index is moved forward (toward the end of the string). 1456 * 1457 * This behaves like CharacterIterator::move32(delta, kCurrent). 1458 * 1459 * Behavior for out-of-bounds indexes: 1460 * `moveIndex32` pins the input index to 0..length(), i.e., 1461 * if the input index<0 then it is pinned to 0; 1462 * if it is index>length() then it is pinned to length(). 1463 * Afterwards, the index is moved by `delta` code points 1464 * forward or backward, 1465 * but no further backward than to 0 and no further forward than to length(). 1466 * The resulting index return value will be in between 0 and length(), inclusively. 1467 * 1468 * Examples: 1469 * \code 1470 * // s has code points 'a' U+10000 'b' U+10ffff U+2029 1471 * UnicodeString s(u"a\U00010000b\U0010ffff\u2029"); 1472 * 1473 * // initial index: position of U+10000 1474 * int32_t index=1; 1475 * 1476 * // the following examples will all result in index==4, position of U+10ffff 1477 * 1478 * // skip 2 code points from some position in the string 1479 * index=s.moveIndex32(index, 2); // skips U+10000 and 'b' 1480 * 1481 * // go to the 3rd code point from the start of s (0-based) 1482 * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b' 1483 * 1484 * // go to the next-to-last code point of s 1485 * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff 1486 * \endcode 1487 * 1488 * @param index input code unit index 1489 * @param delta (signed) code point count to move the index forward or backward 1490 * in the string 1491 * @return the resulting code unit index 1492 * @stable ICU 2.0 1493 */ 1494 int32_t moveIndex32(int32_t index, int32_t delta) const; 1495 1496 /* Substring extraction */ 1497 1498 /** 1499 * Copy the characters in the range 1500 * [`start`, `start + length`) into the array `dst`, 1501 * beginning at `dstStart`. 1502 * If the string aliases to `dst` itself as an external buffer, 1503 * then extract() will not copy the contents. 1504 * 1505 * @param start offset of first character which will be copied into the array 1506 * @param length the number of characters to extract 1507 * @param dst array in which to copy characters. The length of `dst` 1508 * must be at least (`dstStart + length`). 1509 * @param dstStart the offset in `dst` where the first character 1510 * will be extracted 1511 * @stable ICU 2.0 1512 */ 1513 inline void extract(int32_t start, 1514 int32_t length, 1515 Char16Ptr dst, 1516 int32_t dstStart = 0) const; 1517 1518 /** 1519 * Copy the contents of the string into dest. 1520 * This is a convenience function that 1521 * checks if there is enough space in dest, 1522 * extracts the entire string if possible, 1523 * and NUL-terminates dest if possible. 1524 * 1525 * If the string fits into dest but cannot be NUL-terminated 1526 * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING. 1527 * If the string itself does not fit into dest 1528 * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR. 1529 * 1530 * If the string aliases to `dest` itself as an external buffer, 1531 * then extract() will not copy the contents. 1532 * 1533 * @param dest Destination string buffer. 1534 * @param destCapacity Number of char16_ts available at dest. 1535 * @param errorCode ICU error code. 1536 * @return length() 1537 * @stable ICU 2.0 1538 */ 1539 int32_t 1540 extract(Char16Ptr dest, int32_t destCapacity, 1541 UErrorCode &errorCode) const; 1542 1543 /** 1544 * Copy the characters in the range 1545 * [`start`, `start + length`) into the UnicodeString 1546 * `target`. 1547 * @param start offset of first character which will be copied 1548 * @param length the number of characters to extract 1549 * @param target UnicodeString into which to copy characters. 1550 * @stable ICU 2.0 1551 */ 1552 inline void extract(int32_t start, 1553 int32_t length, 1554 UnicodeString& target) const; 1555 1556 /** 1557 * Copy the characters in the range [`start`, `limit`) 1558 * into the array `dst`, beginning at `dstStart`. 1559 * @param start offset of first character which will be copied into the array 1560 * @param limit offset immediately following the last character to be copied 1561 * @param dst array in which to copy characters. The length of `dst` 1562 * must be at least (`dstStart + (limit - start)`). 1563 * @param dstStart the offset in `dst` where the first character 1564 * will be extracted 1565 * @stable ICU 2.0 1566 */ 1567 inline void extractBetween(int32_t start, 1568 int32_t limit, 1569 char16_t *dst, 1570 int32_t dstStart = 0) const; 1571 1572 /** 1573 * Copy the characters in the range [`start`, `limit`) 1574 * into the UnicodeString `target`. Replaceable API. 1575 * @param start offset of first character which will be copied 1576 * @param limit offset immediately following the last character to be copied 1577 * @param target UnicodeString into which to copy characters. 1578 * @stable ICU 2.0 1579 */ 1580 virtual void extractBetween(int32_t start, 1581 int32_t limit, 1582 UnicodeString& target) const override; 1583 1584 /** 1585 * Copy the characters in the range 1586 * [`start`, `start + startLength`) into an array of characters. 1587 * All characters must be invariant (see utypes.h). 1588 * Use US_INV as the last, signature-distinguishing parameter. 1589 * 1590 * This function does not write any more than `targetCapacity` 1591 * characters but returns the length of the entire output string 1592 * so that one can allocate a larger buffer and call the function again 1593 * if necessary. 1594 * The output string is NUL-terminated if possible. 1595 * 1596 * @param start offset of first character which will be copied 1597 * @param startLength the number of characters to extract 1598 * @param target the target buffer for extraction, can be nullptr 1599 * if targetLength is 0 1600 * @param targetCapacity the length of the target buffer 1601 * @param inv Signature-distinguishing parameter, use US_INV. 1602 * @return the output string length, not including the terminating NUL 1603 * @stable ICU 3.2 1604 */ 1605 int32_t extract(int32_t start, 1606 int32_t startLength, 1607 char *target, 1608 int32_t targetCapacity, 1609 enum EInvariant inv) const; 1610 1611 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION 1612 1613 /** 1614 * Copy the characters in the range 1615 * [`start`, `start + length`) into an array of characters 1616 * in the platform's default codepage. 1617 * This function does not write any more than `targetLength` 1618 * characters but returns the length of the entire output string 1619 * so that one can allocate a larger buffer and call the function again 1620 * if necessary. 1621 * The output string is NUL-terminated if possible. 1622 * 1623 * @param start offset of first character which will be copied 1624 * @param startLength the number of characters to extract 1625 * @param target the target buffer for extraction 1626 * @param targetLength the length of the target buffer 1627 * If `target` is nullptr, then the number of bytes required for 1628 * `target` is returned. 1629 * @return the output string length, not including the terminating NUL 1630 * @stable ICU 2.0 1631 */ 1632 int32_t extract(int32_t start, 1633 int32_t startLength, 1634 char *target, 1635 uint32_t targetLength) const; 1636 1637 #endif 1638 1639 #if !UCONFIG_NO_CONVERSION 1640 1641 /** 1642 * Copy the characters in the range 1643 * [`start`, `start + length`) into an array of characters 1644 * in a specified codepage. 1645 * The output string is NUL-terminated. 1646 * 1647 * Recommendation: For invariant-character strings use 1648 * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const 1649 * because it avoids object code dependencies of UnicodeString on 1650 * the conversion code. 1651 * 1652 * @param start offset of first character which will be copied 1653 * @param startLength the number of characters to extract 1654 * @param target the target buffer for extraction 1655 * @param codepage the desired codepage for the characters. 0 has 1656 * the special meaning of the default codepage 1657 * If `codepage` is an empty string (`""`), 1658 * then a simple conversion is performed on the codepage-invariant 1659 * subset ("invariant characters") of the platform encoding. See utypes.h. 1660 * If `target` is nullptr, then the number of bytes required for 1661 * `target` is returned. It is assumed that the target is big enough 1662 * to fit all of the characters. 1663 * @return the output string length, not including the terminating NUL 1664 * @stable ICU 2.0 1665 */ 1666 inline int32_t extract(int32_t start, 1667 int32_t startLength, 1668 char* target, 1669 const char* codepage = nullptr) const; 1670 1671 /** 1672 * Copy the characters in the range 1673 * [`start`, `start + length`) into an array of characters 1674 * in a specified codepage. 1675 * This function does not write any more than `targetLength` 1676 * characters but returns the length of the entire output string 1677 * so that one can allocate a larger buffer and call the function again 1678 * if necessary. 1679 * The output string is NUL-terminated if possible. 1680 * 1681 * Recommendation: For invariant-character strings use 1682 * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const 1683 * because it avoids object code dependencies of UnicodeString on 1684 * the conversion code. 1685 * 1686 * @param start offset of first character which will be copied 1687 * @param startLength the number of characters to extract 1688 * @param target the target buffer for extraction 1689 * @param targetLength the length of the target buffer 1690 * @param codepage the desired codepage for the characters. 0 has 1691 * the special meaning of the default codepage 1692 * If `codepage` is an empty string (`""`), 1693 * then a simple conversion is performed on the codepage-invariant 1694 * subset ("invariant characters") of the platform encoding. See utypes.h. 1695 * If `target` is nullptr, then the number of bytes required for 1696 * `target` is returned. 1697 * @return the output string length, not including the terminating NUL 1698 * @stable ICU 2.0 1699 */ 1700 int32_t extract(int32_t start, 1701 int32_t startLength, 1702 char *target, 1703 uint32_t targetLength, 1704 const char *codepage) const; 1705 1706 /** 1707 * Convert the UnicodeString into a codepage string using an existing UConverter. 1708 * The output string is NUL-terminated if possible. 1709 * 1710 * This function avoids the overhead of opening and closing a converter if 1711 * multiple strings are extracted. 1712 * 1713 * @param dest destination string buffer, can be nullptr if destCapacity==0 1714 * @param destCapacity the number of chars available at dest 1715 * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called), 1716 * or nullptr for the default converter 1717 * @param errorCode normal ICU error code 1718 * @return the length of the output string, not counting the terminating NUL; 1719 * if the length is greater than destCapacity, then the string will not fit 1720 * and a buffer of the indicated length would need to be passed in 1721 * @stable ICU 2.0 1722 */ 1723 int32_t extract(char *dest, int32_t destCapacity, 1724 UConverter *cnv, 1725 UErrorCode &errorCode) const; 1726 1727 #endif 1728 1729 /** 1730 * Create a temporary substring for the specified range. 1731 * Unlike the substring constructor and setTo() functions, 1732 * the object returned here will be a read-only alias (using getBuffer()) 1733 * rather than copying the text. 1734 * As a result, this substring operation is much faster but requires 1735 * that the original string not be modified or deleted during the lifetime 1736 * of the returned substring object. 1737 * @param start offset of the first character visible in the substring 1738 * @param length length of the substring 1739 * @return a read-only alias UnicodeString object for the substring 1740 * @stable ICU 4.4 1741 */ 1742 UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const; 1743 1744 /** 1745 * Create a temporary substring for the specified range. 1746 * Same as tempSubString(start, length) except that the substring range 1747 * is specified as a (start, limit) pair (with an exclusive limit index) 1748 * rather than a (start, length) pair. 1749 * @param start offset of the first character visible in the substring 1750 * @param limit offset immediately following the last character visible in the substring 1751 * @return a read-only alias UnicodeString object for the substring 1752 * @stable ICU 4.4 1753 */ 1754 inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const; 1755 1756 /** 1757 * Convert the UnicodeString to UTF-8 and write the result 1758 * to a ByteSink. This is called by toUTF8String(). 1759 * Unpaired surrogates are replaced with U+FFFD. 1760 * Calls u_strToUTF8WithSub(). 1761 * 1762 * @param sink A ByteSink to which the UTF-8 version of the string is written. 1763 * sink.Flush() is called at the end. 1764 * @stable ICU 4.2 1765 * @see toUTF8String 1766 */ 1767 void toUTF8(ByteSink &sink) const; 1768 1769 /** 1770 * Convert the UnicodeString to UTF-8 and append the result 1771 * to a standard string. 1772 * Unpaired surrogates are replaced with U+FFFD. 1773 * Calls toUTF8(). 1774 * 1775 * @tparam StringClass A std::string or a std::u8string (or a compatible type) 1776 * @param result A std::string or a std::u8string (or a compatible object) 1777 * to which the UTF-8 version of the string is appended. 1778 * @return The string object. 1779 * @stable ICU 4.2 1780 * @see toUTF8 1781 */ 1782 template<typename StringClass> 1783 StringClass &toUTF8String(StringClass &result) const { 1784 StringByteSink<StringClass> sbs(&result, length()); 1785 toUTF8(sbs); 1786 return result; 1787 } 1788 1789 #ifndef U_HIDE_DRAFT_API 1790 /** 1791 * Convert the UnicodeString to a UTF-8 string. 1792 * Unpaired surrogates are replaced with U+FFFD. 1793 * Calls toUTF8(). 1794 * 1795 * @tparam StringClass A std::string or a std::u8string (or a compatible type) 1796 * @return A std::string or a std::u8string (or a compatible object) 1797 * with the UTF-8 version of the string. 1798 * @draft ICU 78 1799 * @see toUTF8 1800 */ 1801 template<typename StringClass> 1802 StringClass toUTF8String() const { 1803 StringClass result; 1804 StringByteSink<StringClass> sbs(&result, length()); 1805 toUTF8(sbs); 1806 return result; 1807 } 1808 #endif // U_HIDE_DRAFT_API 1809 1810 /** 1811 * Convert the UnicodeString to UTF-32. 1812 * Unpaired surrogates are replaced with U+FFFD. 1813 * Calls u_strToUTF32WithSub(). 1814 * 1815 * @param utf32 destination string buffer, can be nullptr if capacity==0 1816 * @param capacity the number of UChar32s available at utf32 1817 * @param errorCode Standard ICU error code. Its input value must 1818 * pass the U_SUCCESS() test, or else the function returns 1819 * immediately. Check for U_FAILURE() on output or use with 1820 * function chaining. (See User Guide for details.) 1821 * @return The length of the UTF-32 string. 1822 * @see fromUTF32 1823 * @stable ICU 4.2 1824 */ 1825 int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const; 1826 1827 /* Length operations */ 1828 1829 /** 1830 * Return the length of the UnicodeString object. 1831 * The length is the number of char16_t code units are in the UnicodeString. 1832 * If you want the number of code points, please use countChar32(). 1833 * @return the length of the UnicodeString object 1834 * @see countChar32 1835 * @stable ICU 2.0 1836 */ 1837 inline int32_t length() const; 1838 1839 /** 1840 * Count Unicode code points in the length char16_t code units of the string. 1841 * A code point may occupy either one or two char16_t code units. 1842 * Counting code points involves reading all code units. 1843 * 1844 * This functions is basically the inverse of moveIndex32(). 1845 * 1846 * @param start the index of the first code unit to check 1847 * @param length the number of char16_t code units to check 1848 * @return the number of code points in the specified code units 1849 * @see length 1850 * @stable ICU 2.0 1851 */ 1852 int32_t 1853 countChar32(int32_t start=0, int32_t length=INT32_MAX) const; 1854 1855 /** 1856 * Check if the length char16_t code units of the string 1857 * contain more Unicode code points than a certain number. 1858 * This is more efficient than counting all code points in this part of the string 1859 * and comparing that number with a threshold. 1860 * This function may not need to scan the string at all if the length 1861 * falls within a certain range, and 1862 * never needs to count more than 'number+1' code points. 1863 * Logically equivalent to (countChar32(start, length)>number). 1864 * A Unicode code point may occupy either one or two char16_t code units. 1865 * 1866 * @param start the index of the first code unit to check (0 for the entire string) 1867 * @param length the number of char16_t code units to check 1868 * (use INT32_MAX for the entire string; remember that start/length 1869 * values are pinned) 1870 * @param number The number of code points in the (sub)string is compared against 1871 * the 'number' parameter. 1872 * @return Boolean value for whether the string contains more Unicode code points 1873 * than 'number'. Same as (u_countChar32(s, length)>number). 1874 * @see countChar32 1875 * @see u_strHasMoreChar32Than 1876 * @stable ICU 2.4 1877 */ 1878 UBool 1879 hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const; 1880 1881 /** 1882 * Determine if this string is empty. 1883 * @return true if this string contains 0 characters, false otherwise. 1884 * @stable ICU 2.0 1885 */ 1886 inline UBool isEmpty() const; 1887 1888 /** 1889 * Return the capacity of the internal buffer of the UnicodeString object. 1890 * This is useful together with the getBuffer functions. 1891 * See there for details. 1892 * 1893 * @return the number of char16_ts available in the internal buffer 1894 * @see getBuffer 1895 * @stable ICU 2.0 1896 */ 1897 inline int32_t getCapacity() const; 1898 1899 /* Other operations */ 1900 1901 /** 1902 * Generate a hash code for this object. 1903 * @return The hash code of this UnicodeString. 1904 * @stable ICU 2.0 1905 */ 1906 inline int32_t hashCode() const; 1907 1908 /** 1909 * Determine if this object contains a valid string. 1910 * A bogus string has no value. It is different from an empty string, 1911 * although in both cases isEmpty() returns true and length() returns 0. 1912 * setToBogus() and isBogus() can be used to indicate that no string value is available. 1913 * For a bogus string, getBuffer() and getTerminatedBuffer() return nullptr, and 1914 * length() returns 0. 1915 * 1916 * @return true if the string is bogus/invalid, false otherwise 1917 * @see setToBogus() 1918 * @stable ICU 2.0 1919 */ 1920 inline UBool isBogus() const; 1921 1922 #ifndef U_HIDE_DRAFT_API 1923 private: 1924 // These type aliases are private; there is no guarantee that they will remain 1925 // aliases to the same types in subsequent versions of ICU. 1926 // Note that whether `std::u16string_view::const_iterator` is a pointer or a 1927 // class that models contiguous_iterator is platform-dependent. 1928 using unspecified_iterator = std::u16string_view::const_iterator; 1929 using unspecified_reverse_iterator = std::u16string_view::const_reverse_iterator; 1930 1931 public: 1932 /** 1933 * @return an iterator to the first code unit in this string. 1934 * The iterator may be a pointer or a contiguous-iterator object. 1935 * @draft ICU 78 1936 */ 1937 unspecified_iterator begin() const { return std::u16string_view(*this).begin(); } 1938 /** 1939 * @return an iterator to just past the last code unit in this string. 1940 * The iterator may be a pointer or a contiguous-iterator object. 1941 * @draft ICU 78 1942 */ 1943 unspecified_iterator end() const { return std::u16string_view(*this).end(); } 1944 /** 1945 * @return a reverse iterator to the last code unit in this string. 1946 * The iterator may be a pointer or a contiguous-iterator object. 1947 * @draft ICU 78 1948 */ 1949 unspecified_reverse_iterator rbegin() const { return std::u16string_view(*this).rbegin(); } 1950 /** 1951 * @return a reverse iterator to just before the first code unit in this string. 1952 * The iterator may be a pointer or a contiguous-iterator object. 1953 * @draft ICU 78 1954 */ 1955 unspecified_reverse_iterator rend() const { return std::u16string_view(*this).rend(); } 1956 #endif // U_HIDE_DRAFT_API 1957 1958 //======================================== 1959 // Write operations 1960 //======================================== 1961 1962 /* Assignment operations */ 1963 1964 /** 1965 * Assignment operator. Replace the characters in this UnicodeString 1966 * with the characters from `srcText`. 1967 * 1968 * Starting with ICU 2.4, the assignment operator and the copy constructor 1969 * allocate a new buffer and copy the buffer contents even for readonly aliases. 1970 * By contrast, the fastCopyFrom() function implements the old, 1971 * more efficient but less safe behavior 1972 * of making this string also a readonly alias to the same buffer. 1973 * 1974 * If the source object has an "open" buffer from getBuffer(minCapacity), 1975 * then the copy is an empty string. 1976 * 1977 * @param srcText The text containing the characters to replace 1978 * @return a reference to this 1979 * @stable ICU 2.0 1980 * @see fastCopyFrom 1981 */ 1982 UnicodeString &operator=(const UnicodeString &srcText); 1983 1984 /** 1985 * Almost the same as the assignment operator. 1986 * Replace the characters in this UnicodeString 1987 * with the characters from `srcText`. 1988 * 1989 * This function works the same as the assignment operator 1990 * for all strings except for ones that are readonly aliases. 1991 * 1992 * Starting with ICU 2.4, the assignment operator and the copy constructor 1993 * allocate a new buffer and copy the buffer contents even for readonly aliases. 1994 * This function implements the old, more efficient but less safe behavior 1995 * of making this string also a readonly alias to the same buffer. 1996 * 1997 * The fastCopyFrom function must be used only if it is known that the lifetime of 1998 * this UnicodeString does not exceed the lifetime of the aliased buffer 1999 * including its contents, for example for strings from resource bundles 2000 * or aliases to string constants. 2001 * 2002 * If the source object has an "open" buffer from getBuffer(minCapacity), 2003 * then the copy is an empty string. 2004 * 2005 * @param src The text containing the characters to replace. 2006 * @return a reference to this 2007 * @stable ICU 2.4 2008 */ 2009 UnicodeString &fastCopyFrom(const UnicodeString &src); 2010 2011 /** 2012 * Assignment operator. Replaces the characters in this UnicodeString 2013 * with a copy of the characters from the `src` 2014 * which is, or which is implicitly convertible to, 2015 * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view. 2016 * 2017 * @param src The string view containing the characters to copy. 2018 * @return a reference to this 2019 * @stable ICU 76 2020 */ 2021 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>> 2022 inline UnicodeString &operator=(const S &src) { 2023 unBogus(); 2024 return doReplace(0, length(), internal::toU16StringView(src)); 2025 } 2026 2027 /** 2028 * Move assignment operator; might leave src in bogus state. 2029 * This string will have the same contents and state that the source string had. 2030 * The behavior is undefined if *this and src are the same object. 2031 * @param src source string 2032 * @return *this 2033 * @stable ICU 56 2034 */ 2035 UnicodeString &operator=(UnicodeString &&src) noexcept; 2036 2037 /** 2038 * Swap strings. 2039 * @param other other string 2040 * @stable ICU 56 2041 */ 2042 void swap(UnicodeString &other) noexcept; 2043 2044 /** 2045 * Non-member UnicodeString swap function. 2046 * @param s1 will get s2's contents and state 2047 * @param s2 will get s1's contents and state 2048 * @stable ICU 56 2049 */ 2050 friend inline void U_EXPORT2 2051 swap(UnicodeString &s1, UnicodeString &s2) noexcept { 2052 s1.swap(s2); 2053 } 2054 2055 /** 2056 * Assignment operator. Replace the characters in this UnicodeString 2057 * with the code unit `ch`. 2058 * @param ch the code unit to replace 2059 * @return a reference to this 2060 * @stable ICU 2.0 2061 */ 2062 inline UnicodeString& operator= (char16_t ch); 2063 2064 /** 2065 * Assignment operator. Replace the characters in this UnicodeString 2066 * with the code point `ch`. 2067 * @param ch the code point to replace 2068 * @return a reference to this 2069 * @stable ICU 2.0 2070 */ 2071 inline UnicodeString& operator= (UChar32 ch); 2072 2073 /** 2074 * Set the text in the UnicodeString object to the characters 2075 * in `srcText` in the range 2076 * [`srcStart`, `srcText.length()`). 2077 * `srcText` is not modified. 2078 * @param srcText the source for the new characters 2079 * @param srcStart the offset into `srcText` where new characters 2080 * will be obtained 2081 * @return a reference to this 2082 * @stable ICU 2.2 2083 */ 2084 inline UnicodeString& setTo(const UnicodeString& srcText, 2085 int32_t srcStart); 2086 2087 /** 2088 * Set the text in the UnicodeString object to the characters 2089 * in `srcText` in the range 2090 * [`srcStart`, `srcStart + srcLength`). 2091 * `srcText` is not modified. 2092 * @param srcText the source for the new characters 2093 * @param srcStart the offset into `srcText` where new characters 2094 * will be obtained 2095 * @param srcLength the number of characters in `srcText` in the 2096 * replace string. 2097 * @return a reference to this 2098 * @stable ICU 2.0 2099 */ 2100 inline UnicodeString& setTo(const UnicodeString& srcText, 2101 int32_t srcStart, 2102 int32_t srcLength); 2103 2104 /** 2105 * Set the text in the UnicodeString object to the characters in 2106 * `srcText`. 2107 * `srcText` is not modified. 2108 * @param srcText the source for the new characters 2109 * @return a reference to this 2110 * @stable ICU 2.0 2111 */ 2112 inline UnicodeString& setTo(const UnicodeString& srcText); 2113 2114 /** 2115 * Set the characters in the UnicodeString object to the characters 2116 * in `srcChars`. `srcChars` is not modified. 2117 * @param srcChars the source for the new characters 2118 * @param srcLength the number of Unicode characters in srcChars. 2119 * @return a reference to this 2120 * @stable ICU 2.0 2121 */ 2122 inline UnicodeString& setTo(const char16_t *srcChars, 2123 int32_t srcLength); 2124 2125 /** 2126 * Set the characters in the UnicodeString object to the code unit 2127 * `srcChar`. 2128 * @param srcChar the code unit which becomes the UnicodeString's character 2129 * content 2130 * @return a reference to this 2131 * @stable ICU 2.0 2132 */ 2133 inline UnicodeString& setTo(char16_t srcChar); 2134 2135 /** 2136 * Set the characters in the UnicodeString object to the code point 2137 * `srcChar`. 2138 * @param srcChar the code point which becomes the UnicodeString's character 2139 * content 2140 * @return a reference to this 2141 * @stable ICU 2.0 2142 */ 2143 inline UnicodeString& setTo(UChar32 srcChar); 2144 2145 /** 2146 * Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor. 2147 * The text will be used for the UnicodeString object, but 2148 * it will not be released when the UnicodeString is destroyed. 2149 * This has copy-on-write semantics: 2150 * When the string is modified, then the buffer is first copied into 2151 * newly allocated memory. 2152 * The aliased buffer is never modified. 2153 * 2154 * In an assignment to another UnicodeString, when using the copy constructor 2155 * or the assignment operator, the text will be copied. 2156 * When using fastCopyFrom(), the text will be aliased again, 2157 * so that both strings then alias the same readonly-text. 2158 * 2159 * @param isTerminated specifies if `text` is `NUL`-terminated. 2160 * This must be true if `textLength==-1`. 2161 * @param text The characters to alias for the UnicodeString. 2162 * @param textLength The number of Unicode characters in `text` to alias. 2163 * If -1, then this constructor will determine the length 2164 * by calling `u_strlen()`. 2165 * @return a reference to this 2166 * @stable ICU 2.0 2167 */ 2168 UnicodeString &setTo(UBool isTerminated, 2169 ConstChar16Ptr text, 2170 int32_t textLength); 2171 2172 /** 2173 * Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor. 2174 * The text will be used for the UnicodeString object, but 2175 * it will not be released when the UnicodeString is destroyed. 2176 * This has write-through semantics: 2177 * For as long as the capacity of the buffer is sufficient, write operations 2178 * will directly affect the buffer. When more capacity is necessary, then 2179 * a new buffer will be allocated and the contents copied as with regularly 2180 * constructed strings. 2181 * In an assignment to another UnicodeString, the buffer will be copied. 2182 * The extract(Char16Ptr dst) function detects whether the dst pointer is the same 2183 * as the string buffer itself and will in this case not copy the contents. 2184 * 2185 * @param buffer The characters to alias for the UnicodeString. 2186 * @param buffLength The number of Unicode characters in `buffer` to alias. 2187 * @param buffCapacity The size of `buffer` in char16_ts. 2188 * @return a reference to this 2189 * @stable ICU 2.0 2190 */ 2191 UnicodeString &setTo(char16_t *buffer, 2192 int32_t buffLength, 2193 int32_t buffCapacity); 2194 2195 /** 2196 * Make this UnicodeString object invalid. 2197 * The string will test true with isBogus(). 2198 * 2199 * A bogus string has no value. It is different from an empty string. 2200 * It can be used to indicate that no string value is available. 2201 * getBuffer() and getTerminatedBuffer() return nullptr, and 2202 * length() returns 0. 2203 * 2204 * This utility function is used throughout the UnicodeString 2205 * implementation to indicate that a UnicodeString operation failed, 2206 * and may be used in other functions, 2207 * especially but not exclusively when such functions do not 2208 * take a UErrorCode for simplicity. 2209 * 2210 * The following methods, and no others, will clear a string object's bogus flag: 2211 * - remove() 2212 * - remove(0, INT32_MAX) 2213 * - truncate(0) 2214 * - operator=() (assignment operator) 2215 * - setTo(...) 2216 * 2217 * The simplest ways to turn a bogus string into an empty one 2218 * is to use the remove() function. 2219 * Examples for other functions that are equivalent to "set to empty string": 2220 * \code 2221 * if(s.isBogus()) { 2222 * s.remove(); // set to an empty string (remove all), or 2223 * s.remove(0, INT32_MAX); // set to an empty string (remove all), or 2224 * s.truncate(0); // set to an empty string (complete truncation), or 2225 * s=UnicodeString(); // assign an empty string, or 2226 * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or 2227 * s.setTo(u"", 0); // set to an empty C Unicode string 2228 * } 2229 * \endcode 2230 * 2231 * @see isBogus() 2232 * @stable ICU 2.0 2233 */ 2234 void setToBogus(); 2235 2236 /** 2237 * Set the character at the specified offset to the specified character. 2238 * @param offset A valid offset into the text of the character to set 2239 * @param ch The new character 2240 * @return A reference to this 2241 * @stable ICU 2.0 2242 */ 2243 UnicodeString& setCharAt(int32_t offset, 2244 char16_t ch); 2245 2246 2247 /* Append operations */ 2248 2249 /** 2250 * Append operator. Append the code unit `ch` to the UnicodeString 2251 * object. 2252 * @param ch the code unit to be appended 2253 * @return a reference to this 2254 * @stable ICU 2.0 2255 */ 2256 inline UnicodeString& operator+= (char16_t ch); 2257 2258 /** 2259 * Append operator. Append the code point `ch` to the UnicodeString 2260 * object. 2261 * @param ch the code point to be appended 2262 * @return a reference to this 2263 * @stable ICU 2.0 2264 */ 2265 inline UnicodeString& operator+= (UChar32 ch); 2266 2267 /** 2268 * Append operator. Append the characters in `srcText` to the 2269 * UnicodeString object. `srcText` is not modified. 2270 * @param srcText the source for the new characters 2271 * @return a reference to this 2272 * @stable ICU 2.0 2273 */ 2274 inline UnicodeString& operator+= (const UnicodeString& srcText); 2275 2276 /** 2277 * Append operator. Appends the characters in `src` 2278 * which is, or which is implicitly convertible to, 2279 * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view, 2280 * to the UnicodeString object. 2281 * 2282 * @param src the source for the new characters 2283 * @return a reference to this 2284 * @stable ICU 76 2285 */ 2286 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>> 2287 inline UnicodeString& operator+=(const S &src) { 2288 return doAppend(internal::toU16StringView(src)); 2289 } 2290 2291 /** 2292 * Append the characters 2293 * in `srcText` in the range 2294 * [`srcStart`, `srcStart + srcLength`) to the 2295 * UnicodeString object at offset `start`. `srcText` 2296 * is not modified. 2297 * @param srcText the source for the new characters 2298 * @param srcStart the offset into `srcText` where new characters 2299 * will be obtained 2300 * @param srcLength the number of characters in `srcText` in 2301 * the append string 2302 * @return a reference to this 2303 * @stable ICU 2.0 2304 */ 2305 inline UnicodeString& append(const UnicodeString& srcText, 2306 int32_t srcStart, 2307 int32_t srcLength); 2308 2309 /** 2310 * Append the characters in `srcText` to the UnicodeString object. 2311 * `srcText` is not modified. 2312 * @param srcText the source for the new characters 2313 * @return a reference to this 2314 * @stable ICU 2.0 2315 */ 2316 inline UnicodeString& append(const UnicodeString& srcText); 2317 2318 /** 2319 * Append the characters in `srcChars` in the range 2320 * [`srcStart`, `srcStart + srcLength`) to the UnicodeString 2321 * object at offset 2322 * `start`. `srcChars` is not modified. 2323 * @param srcChars the source for the new characters 2324 * @param srcStart the offset into `srcChars` where new characters 2325 * will be obtained 2326 * @param srcLength the number of characters in `srcChars` in 2327 * the append string; can be -1 if `srcChars` is NUL-terminated 2328 * @return a reference to this 2329 * @stable ICU 2.0 2330 */ 2331 inline UnicodeString& append(const char16_t *srcChars, 2332 int32_t srcStart, 2333 int32_t srcLength); 2334 2335 /** 2336 * Append the characters in `srcChars` to the UnicodeString object. 2337 * `srcChars` is not modified. 2338 * @param srcChars the source for the new characters 2339 * @param srcLength the number of Unicode characters in `srcChars`; 2340 * can be -1 if `srcChars` is NUL-terminated 2341 * @return a reference to this 2342 * @stable ICU 2.0 2343 */ 2344 inline UnicodeString& append(ConstChar16Ptr srcChars, 2345 int32_t srcLength); 2346 2347 /** 2348 * Appends the characters in `src` 2349 * which is, or which is implicitly convertible to, 2350 * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view, 2351 * to the UnicodeString object. 2352 * 2353 * @param src the source for the new characters 2354 * @return a reference to this 2355 * @stable ICU 76 2356 */ 2357 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>> 2358 inline UnicodeString& append(const S &src) { 2359 return doAppend(internal::toU16StringView(src)); 2360 } 2361 2362 /** 2363 * Append the code unit `srcChar` to the UnicodeString object. 2364 * @param srcChar the code unit to append 2365 * @return a reference to this 2366 * @stable ICU 2.0 2367 */ 2368 inline UnicodeString& append(char16_t srcChar); 2369 2370 /** 2371 * Append the code point `srcChar` to the UnicodeString object. 2372 * @param srcChar the code point to append 2373 * @return a reference to this 2374 * @stable ICU 2.0 2375 */ 2376 UnicodeString& append(UChar32 srcChar); 2377 2378 #ifndef U_HIDE_DRAFT_API 2379 /** 2380 * Appends the code unit `c` to the UnicodeString object. 2381 * Same as append(c) except does not return *this. 2382 * 2383 * @param c the code unit to append 2384 * @draft ICU 78 2385 */ 2386 inline void push_back(char16_t c) { append(c); } 2387 #endif // U_HIDE_DRAFT_API 2388 2389 /* Insert operations */ 2390 2391 /** 2392 * Insert the characters in `srcText` in the range 2393 * [`srcStart`, `srcStart + srcLength`) into the UnicodeString 2394 * object at offset `start`. `srcText` is not modified. 2395 * @param start the offset where the insertion begins 2396 * @param srcText the source for the new characters 2397 * @param srcStart the offset into `srcText` where new characters 2398 * will be obtained 2399 * @param srcLength the number of characters in `srcText` in 2400 * the insert string 2401 * @return a reference to this 2402 * @stable ICU 2.0 2403 */ 2404 inline UnicodeString& insert(int32_t start, 2405 const UnicodeString& srcText, 2406 int32_t srcStart, 2407 int32_t srcLength); 2408 2409 /** 2410 * Insert the characters in `srcText` into the UnicodeString object 2411 * at offset `start`. `srcText` is not modified. 2412 * @param start the offset where the insertion begins 2413 * @param srcText the source for the new characters 2414 * @return a reference to this 2415 * @stable ICU 2.0 2416 */ 2417 inline UnicodeString& insert(int32_t start, 2418 const UnicodeString& srcText); 2419 2420 /** 2421 * Insert the characters in `srcChars` in the range 2422 * [`srcStart`, `srcStart + srcLength`) into the UnicodeString 2423 * object at offset `start`. `srcChars` is not modified. 2424 * @param start the offset at which the insertion begins 2425 * @param srcChars the source for the new characters 2426 * @param srcStart the offset into `srcChars` where new characters 2427 * will be obtained 2428 * @param srcLength the number of characters in `srcChars` 2429 * in the insert string 2430 * @return a reference to this 2431 * @stable ICU 2.0 2432 */ 2433 inline UnicodeString& insert(int32_t start, 2434 const char16_t *srcChars, 2435 int32_t srcStart, 2436 int32_t srcLength); 2437 2438 /** 2439 * Insert the characters in `srcChars` into the UnicodeString object 2440 * at offset `start`. `srcChars` is not modified. 2441 * @param start the offset where the insertion begins 2442 * @param srcChars the source for the new characters 2443 * @param srcLength the number of Unicode characters in srcChars. 2444 * @return a reference to this 2445 * @stable ICU 2.0 2446 */ 2447 inline UnicodeString& insert(int32_t start, 2448 ConstChar16Ptr srcChars, 2449 int32_t srcLength); 2450 2451 /** 2452 * Insert the code unit `srcChar` into the UnicodeString object at 2453 * offset `start`. 2454 * @param start the offset at which the insertion occurs 2455 * @param srcChar the code unit to insert 2456 * @return a reference to this 2457 * @stable ICU 2.0 2458 */ 2459 inline UnicodeString& insert(int32_t start, 2460 char16_t srcChar); 2461 2462 /** 2463 * Insert the code point `srcChar` into the UnicodeString object at 2464 * offset `start`. 2465 * @param start the offset at which the insertion occurs 2466 * @param srcChar the code point to insert 2467 * @return a reference to this 2468 * @stable ICU 2.0 2469 */ 2470 inline UnicodeString& insert(int32_t start, 2471 UChar32 srcChar); 2472 2473 2474 /* Replace operations */ 2475 2476 /** 2477 * Replace the characters in the range 2478 * [`start`, `start + length`) with the characters in 2479 * `srcText` in the range 2480 * [`srcStart`, `srcStart + srcLength`). 2481 * `srcText` is not modified. 2482 * @param start the offset at which the replace operation begins 2483 * @param length the number of characters to replace. The character at 2484 * `start + length` is not modified. 2485 * @param srcText the source for the new characters 2486 * @param srcStart the offset into `srcText` where new characters 2487 * will be obtained 2488 * @param srcLength the number of characters in `srcText` in 2489 * the replace string 2490 * @return a reference to this 2491 * @stable ICU 2.0 2492 */ 2493 inline UnicodeString& replace(int32_t start, 2494 int32_t length, 2495 const UnicodeString& srcText, 2496 int32_t srcStart, 2497 int32_t srcLength); 2498 2499 /** 2500 * Replace the characters in the range 2501 * [`start`, `start + length`) 2502 * with the characters in `srcText`. `srcText` is 2503 * not modified. 2504 * @param start the offset at which the replace operation begins 2505 * @param length the number of characters to replace. The character at 2506 * `start + length` is not modified. 2507 * @param srcText the source for the new characters 2508 * @return a reference to this 2509 * @stable ICU 2.0 2510 */ 2511 inline UnicodeString& replace(int32_t start, 2512 int32_t length, 2513 const UnicodeString& srcText); 2514 2515 /** 2516 * Replace the characters in the range 2517 * [`start`, `start + length`) with the characters in 2518 * `srcChars` in the range 2519 * [`srcStart`, `srcStart + srcLength`). `srcChars` 2520 * is not modified. 2521 * @param start the offset at which the replace operation begins 2522 * @param length the number of characters to replace. The character at 2523 * `start + length` is not modified. 2524 * @param srcChars the source for the new characters 2525 * @param srcStart the offset into `srcChars` where new characters 2526 * will be obtained 2527 * @param srcLength the number of characters in `srcChars` 2528 * in the replace string 2529 * @return a reference to this 2530 * @stable ICU 2.0 2531 */ 2532 inline UnicodeString& replace(int32_t start, 2533 int32_t length, 2534 const char16_t *srcChars, 2535 int32_t srcStart, 2536 int32_t srcLength); 2537 2538 /** 2539 * Replace the characters in the range 2540 * [`start`, `start + length`) with the characters in 2541 * `srcChars`. `srcChars` is not modified. 2542 * @param start the offset at which the replace operation begins 2543 * @param length number of characters to replace. The character at 2544 * `start + length` is not modified. 2545 * @param srcChars the source for the new characters 2546 * @param srcLength the number of Unicode characters in srcChars 2547 * @return a reference to this 2548 * @stable ICU 2.0 2549 */ 2550 inline UnicodeString& replace(int32_t start, 2551 int32_t length, 2552 ConstChar16Ptr srcChars, 2553 int32_t srcLength); 2554 2555 /** 2556 * Replace the characters in the range 2557 * [`start`, `start + length`) with the code unit 2558 * `srcChar`. 2559 * @param start the offset at which the replace operation begins 2560 * @param length the number of characters to replace. The character at 2561 * `start + length` is not modified. 2562 * @param srcChar the new code unit 2563 * @return a reference to this 2564 * @stable ICU 2.0 2565 */ 2566 inline UnicodeString& replace(int32_t start, 2567 int32_t length, 2568 char16_t srcChar); 2569 2570 /** 2571 * Replace the characters in the range 2572 * [`start`, `start + length`) with the code point 2573 * `srcChar`. 2574 * @param start the offset at which the replace operation begins 2575 * @param length the number of characters to replace. The character at 2576 * `start + length` is not modified. 2577 * @param srcChar the new code point 2578 * @return a reference to this 2579 * @stable ICU 2.0 2580 */ 2581 UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar); 2582 2583 /** 2584 * Replace the characters in the range [`start`, `limit`) 2585 * with the characters in `srcText`. `srcText` is not modified. 2586 * @param start the offset at which the replace operation begins 2587 * @param limit the offset immediately following the replace range 2588 * @param srcText the source for the new characters 2589 * @return a reference to this 2590 * @stable ICU 2.0 2591 */ 2592 inline UnicodeString& replaceBetween(int32_t start, 2593 int32_t limit, 2594 const UnicodeString& srcText); 2595 2596 /** 2597 * Replace the characters in the range [`start`, `limit`) 2598 * with the characters in `srcText` in the range 2599 * [`srcStart`, `srcLimit`). `srcText` is not modified. 2600 * @param start the offset at which the replace operation begins 2601 * @param limit the offset immediately following the replace range 2602 * @param srcText the source for the new characters 2603 * @param srcStart the offset into `srcChars` where new characters 2604 * will be obtained 2605 * @param srcLimit the offset immediately following the range to copy 2606 * in `srcText` 2607 * @return a reference to this 2608 * @stable ICU 2.0 2609 */ 2610 inline UnicodeString& replaceBetween(int32_t start, 2611 int32_t limit, 2612 const UnicodeString& srcText, 2613 int32_t srcStart, 2614 int32_t srcLimit); 2615 2616 /** 2617 * Replace a substring of this object with the given text. 2618 * @param start the beginning index, inclusive; `0 <= start <= limit`. 2619 * @param limit the ending index, exclusive; `start <= limit <= length()`. 2620 * @param text the text to replace characters `start` to `limit - 1` 2621 * @stable ICU 2.0 2622 */ 2623 virtual void handleReplaceBetween(int32_t start, 2624 int32_t limit, 2625 const UnicodeString& text) override; 2626 2627 /** 2628 * Replaceable API 2629 * @return true if it has MetaData 2630 * @stable ICU 2.4 2631 */ 2632 virtual UBool hasMetaData() const override; 2633 2634 /** 2635 * Copy a substring of this object, retaining attribute (out-of-band) 2636 * information. This method is used to duplicate or reorder substrings. 2637 * The destination index must not overlap the source range. 2638 * 2639 * @param start the beginning index, inclusive; `0 <= start <= limit`. 2640 * @param limit the ending index, exclusive; `start <= limit <= length()`. 2641 * @param dest the destination index. The characters from 2642 * `start..limit-1` will be copied to `dest`. 2643 * Implementations of this method may assume that `dest <= start || 2644 * dest >= limit`. 2645 * @stable ICU 2.0 2646 */ 2647 virtual void copy(int32_t start, int32_t limit, int32_t dest) override; 2648 2649 /* Search and replace operations */ 2650 2651 /** 2652 * Replace all occurrences of characters in oldText with the characters 2653 * in newText 2654 * @param oldText the text containing the search text 2655 * @param newText the text containing the replacement text 2656 * @return a reference to this 2657 * @stable ICU 2.0 2658 */ 2659 inline UnicodeString& findAndReplace(const UnicodeString& oldText, 2660 const UnicodeString& newText); 2661 2662 /** 2663 * Replace all occurrences of characters in oldText with characters 2664 * in newText 2665 * in the range [`start`, `start + length`). 2666 * @param start the start of the range in which replace will performed 2667 * @param length the length of the range in which replace will be performed 2668 * @param oldText the text containing the search text 2669 * @param newText the text containing the replacement text 2670 * @return a reference to this 2671 * @stable ICU 2.0 2672 */ 2673 inline UnicodeString& findAndReplace(int32_t start, 2674 int32_t length, 2675 const UnicodeString& oldText, 2676 const UnicodeString& newText); 2677 2678 /** 2679 * Replace all occurrences of characters in oldText in the range 2680 * [`oldStart`, `oldStart + oldLength`) with the characters 2681 * in newText in the range 2682 * [`newStart`, `newStart + newLength`) 2683 * in the range [`start`, `start + length`). 2684 * @param start the start of the range in which replace will performed 2685 * @param length the length of the range in which replace will be performed 2686 * @param oldText the text containing the search text 2687 * @param oldStart the start of the search range in `oldText` 2688 * @param oldLength the length of the search range in `oldText` 2689 * @param newText the text containing the replacement text 2690 * @param newStart the start of the replacement range in `newText` 2691 * @param newLength the length of the replacement range in `newText` 2692 * @return a reference to this 2693 * @stable ICU 2.0 2694 */ 2695 UnicodeString& findAndReplace(int32_t start, 2696 int32_t length, 2697 const UnicodeString& oldText, 2698 int32_t oldStart, 2699 int32_t oldLength, 2700 const UnicodeString& newText, 2701 int32_t newStart, 2702 int32_t newLength); 2703 2704 2705 /* Remove operations */ 2706 2707 /** 2708 * Removes all characters from the UnicodeString object and clears the bogus flag. 2709 * This is the UnicodeString equivalent of std::string’s clear(). 2710 * 2711 * @return a reference to this 2712 * @see setToBogus 2713 * @stable ICU 2.0 2714 */ 2715 inline UnicodeString& remove(); 2716 2717 /** 2718 * Remove the characters in the range 2719 * [`start`, `start + length`) from the UnicodeString object. 2720 * @param start the offset of the first character to remove 2721 * @param length the number of characters to remove 2722 * @return a reference to this 2723 * @stable ICU 2.0 2724 */ 2725 inline UnicodeString& remove(int32_t start, 2726 int32_t length = static_cast<int32_t>(INT32_MAX)); 2727 2728 /** 2729 * Remove the characters in the range 2730 * [`start`, `limit`) from the UnicodeString object. 2731 * @param start the offset of the first character to remove 2732 * @param limit the offset immediately following the range to remove 2733 * @return a reference to this 2734 * @stable ICU 2.0 2735 */ 2736 inline UnicodeString& removeBetween(int32_t start, 2737 int32_t limit = static_cast<int32_t>(INT32_MAX)); 2738 2739 /** 2740 * Retain only the characters in the range 2741 * [`start`, `limit`) from the UnicodeString object. 2742 * Removes characters before `start` and at and after `limit`. 2743 * @param start the offset of the first character to retain 2744 * @param limit the offset immediately following the range to retain 2745 * @return a reference to this 2746 * @stable ICU 4.4 2747 */ 2748 inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX); 2749 2750 /* Length operations */ 2751 2752 /** 2753 * Pad the start of this UnicodeString with the character `padChar`. 2754 * If the length of this UnicodeString is less than targetLength, 2755 * length() - targetLength copies of padChar will be added to the 2756 * beginning of this UnicodeString. 2757 * @param targetLength the desired length of the string 2758 * @param padChar the character to use for padding. Defaults to 2759 * space (U+0020) 2760 * @return true if the text was padded, false otherwise. 2761 * @stable ICU 2.0 2762 */ 2763 UBool padLeading(int32_t targetLength, 2764 char16_t padChar = 0x0020); 2765 2766 /** 2767 * Pad the end of this UnicodeString with the character `padChar`. 2768 * If the length of this UnicodeString is less than targetLength, 2769 * length() - targetLength copies of padChar will be added to the 2770 * end of this UnicodeString. 2771 * @param targetLength the desired length of the string 2772 * @param padChar the character to use for padding. Defaults to 2773 * space (U+0020) 2774 * @return true if the text was padded, false otherwise. 2775 * @stable ICU 2.0 2776 */ 2777 UBool padTrailing(int32_t targetLength, 2778 char16_t padChar = 0x0020); 2779 2780 /** 2781 * Truncate this UnicodeString to the `targetLength`. 2782 * @param targetLength the desired length of this UnicodeString. 2783 * @return true if the text was truncated, false otherwise 2784 * @stable ICU 2.0 2785 */ 2786 inline UBool truncate(int32_t targetLength); 2787 2788 /** 2789 * Trims leading and trailing whitespace from this UnicodeString. 2790 * @return a reference to this 2791 * @stable ICU 2.0 2792 */ 2793 UnicodeString& trim(); 2794 2795 /* Miscellaneous operations */ 2796 2797 /** 2798 * Reverse this UnicodeString in place. 2799 * @return a reference to this 2800 * @stable ICU 2.0 2801 */ 2802 inline UnicodeString& reverse(); 2803 2804 /** 2805 * Reverse the range [`start`, `start + length`) in 2806 * this UnicodeString. 2807 * @param start the start of the range to reverse 2808 * @param length the number of characters to to reverse 2809 * @return a reference to this 2810 * @stable ICU 2.0 2811 */ 2812 inline UnicodeString& reverse(int32_t start, 2813 int32_t length); 2814 2815 /** 2816 * Convert the characters in this to UPPER CASE following the conventions of 2817 * the default locale. 2818 * @return A reference to this. 2819 * @stable ICU 2.0 2820 */ 2821 UnicodeString& toUpper(); 2822 2823 /** 2824 * Convert the characters in this to UPPER CASE following the conventions of 2825 * a specific locale. 2826 * @param locale The locale containing the conventions to use. 2827 * @return A reference to this. 2828 * @stable ICU 2.0 2829 */ 2830 UnicodeString& toUpper(const Locale& locale); 2831 2832 /** 2833 * Convert the characters in this to lower case following the conventions of 2834 * the default locale. 2835 * @return A reference to this. 2836 * @stable ICU 2.0 2837 */ 2838 UnicodeString& toLower(); 2839 2840 /** 2841 * Convert the characters in this to lower case following the conventions of 2842 * a specific locale. 2843 * @param locale The locale containing the conventions to use. 2844 * @return A reference to this. 2845 * @stable ICU 2.0 2846 */ 2847 UnicodeString& toLower(const Locale& locale); 2848 2849 #if !UCONFIG_NO_BREAK_ITERATION 2850 2851 /** 2852 * Titlecase this string, convenience function using the default locale. 2853 * 2854 * Casing is locale-dependent and context-sensitive. 2855 * Titlecasing uses a break iterator to find the first characters of words 2856 * that are to be titlecased. It titlecases those characters and lowercases 2857 * all others. 2858 * 2859 * The titlecase break iterator can be provided to customize for arbitrary 2860 * styles, using rules and dictionaries beyond the standard iterators. 2861 * It may be more efficient to always provide an iterator to avoid 2862 * opening and closing one for each string. 2863 * If the break iterator passed in is null, the default Unicode algorithm 2864 * will be used to determine the titlecase positions. 2865 * 2866 * This function uses only the setText(), first() and next() methods of the 2867 * provided break iterator. 2868 * 2869 * @param titleIter A break iterator to find the first characters of words 2870 * that are to be titlecased. 2871 * If none is provided (0), then a standard titlecase 2872 * break iterator is opened. 2873 * Otherwise the provided iterator is set to the string's text. 2874 * @return A reference to this. 2875 * @stable ICU 2.1 2876 */ 2877 UnicodeString &toTitle(BreakIterator *titleIter); 2878 2879 /** 2880 * Titlecase this string. 2881 * 2882 * Casing is locale-dependent and context-sensitive. 2883 * Titlecasing uses a break iterator to find the first characters of words 2884 * that are to be titlecased. It titlecases those characters and lowercases 2885 * all others. 2886 * 2887 * The titlecase break iterator can be provided to customize for arbitrary 2888 * styles, using rules and dictionaries beyond the standard iterators. 2889 * It may be more efficient to always provide an iterator to avoid 2890 * opening and closing one for each string. 2891 * If the break iterator passed in is null, the default Unicode algorithm 2892 * will be used to determine the titlecase positions. 2893 * 2894 * This function uses only the setText(), first() and next() methods of the 2895 * provided break iterator. 2896 * 2897 * @param titleIter A break iterator to find the first characters of words 2898 * that are to be titlecased. 2899 * If none is provided (0), then a standard titlecase 2900 * break iterator is opened. 2901 * Otherwise the provided iterator is set to the string's text. 2902 * @param locale The locale to consider. 2903 * @return A reference to this. 2904 * @stable ICU 2.1 2905 */ 2906 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale); 2907 2908 /** 2909 * Titlecase this string, with options. 2910 * 2911 * Casing is locale-dependent and context-sensitive. 2912 * Titlecasing uses a break iterator to find the first characters of words 2913 * that are to be titlecased. It titlecases those characters and lowercases 2914 * all others. (This can be modified with options.) 2915 * 2916 * The titlecase break iterator can be provided to customize for arbitrary 2917 * styles, using rules and dictionaries beyond the standard iterators. 2918 * It may be more efficient to always provide an iterator to avoid 2919 * opening and closing one for each string. 2920 * If the break iterator passed in is null, the default Unicode algorithm 2921 * will be used to determine the titlecase positions. 2922 * 2923 * This function uses only the setText(), first() and next() methods of the 2924 * provided break iterator. 2925 * 2926 * @param titleIter A break iterator to find the first characters of words 2927 * that are to be titlecased. 2928 * If none is provided (0), then a standard titlecase 2929 * break iterator is opened. 2930 * Otherwise the provided iterator is set to the string's text. 2931 * @param locale The locale to consider. 2932 * @param options Options bit set, usually 0. See U_TITLECASE_NO_LOWERCASE, 2933 * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, 2934 * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. 2935 * @return A reference to this. 2936 * @stable ICU 3.8 2937 */ 2938 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options); 2939 2940 #endif 2941 2942 /** 2943 * Case-folds the characters in this string. 2944 * 2945 * Case-folding is locale-independent and not context-sensitive, 2946 * but there is an option for whether to include or exclude mappings for dotted I 2947 * and dotless i that are marked with 'T' in CaseFolding.txt. 2948 * 2949 * The result may be longer or shorter than the original. 2950 * 2951 * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I 2952 * @return A reference to this. 2953 * @stable ICU 2.0 2954 */ 2955 UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/); 2956 2957 //======================================== 2958 // Access to the internal buffer 2959 //======================================== 2960 2961 /** 2962 * Get a read/write pointer to the internal buffer. 2963 * The buffer is guaranteed to be large enough for at least minCapacity char16_ts, 2964 * writable, and is still owned by the UnicodeString object. 2965 * Calls to getBuffer(minCapacity) must not be nested, and 2966 * must be matched with calls to releaseBuffer(newLength). 2967 * If the string buffer was read-only or shared, 2968 * then it will be reallocated and copied. 2969 * 2970 * An attempted nested call will return 0, and will not further modify the 2971 * state of the UnicodeString object. 2972 * It also returns 0 if the string is bogus. 2973 * 2974 * The actual capacity of the string buffer may be larger than minCapacity. 2975 * getCapacity() returns the actual capacity. 2976 * For many operations, the full capacity should be used to avoid reallocations. 2977 * 2978 * While the buffer is "open" between getBuffer(minCapacity) 2979 * and releaseBuffer(newLength), the following applies: 2980 * - The string length is set to 0. 2981 * - Any read API call on the UnicodeString object will behave like on a 0-length string. 2982 * - Any write API call on the UnicodeString object is disallowed and will have no effect. 2983 * - You can read from and write to the returned buffer. 2984 * - The previous string contents will still be in the buffer; 2985 * if you want to use it, then you need to call length() before getBuffer(minCapacity). 2986 * If the length() was greater than minCapacity, then any contents after minCapacity 2987 * may be lost. 2988 * The buffer contents is not NUL-terminated by getBuffer(). 2989 * If length() < getCapacity() then you can terminate it by writing a NUL 2990 * at index length(). 2991 * - You must call releaseBuffer(newLength) before and in order to 2992 * return to normal UnicodeString operation. 2993 * 2994 * @param minCapacity the minimum number of char16_ts that are to be available 2995 * in the buffer, starting at the returned pointer; 2996 * default to the current string capacity if minCapacity==-1 2997 * @return a writable pointer to the internal string buffer, 2998 * or nullptr if an error occurs (nested calls, out of memory) 2999 * 3000 * @see releaseBuffer 3001 * @see getTerminatedBuffer() 3002 * @stable ICU 2.0 3003 */ 3004 char16_t *getBuffer(int32_t minCapacity); 3005 3006 /** 3007 * Release a read/write buffer on a UnicodeString object with an 3008 * "open" getBuffer(minCapacity). 3009 * This function must be called in a matched pair with getBuffer(minCapacity). 3010 * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open". 3011 * 3012 * It will set the string length to newLength, at most to the current capacity. 3013 * If newLength==-1 then it will set the length according to the 3014 * first NUL in the buffer, or to the capacity if there is no NUL. 3015 * 3016 * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation. 3017 * 3018 * @param newLength the new length of the UnicodeString object; 3019 * defaults to the current capacity if newLength is greater than that; 3020 * if newLength==-1, it defaults to u_strlen(buffer) but not more than 3021 * the current capacity of the string 3022 * 3023 * @see getBuffer(int32_t minCapacity) 3024 * @stable ICU 2.0 3025 */ 3026 void releaseBuffer(int32_t newLength=-1); 3027 3028 /** 3029 * Get a read-only pointer to the internal buffer. 3030 * This can be called at any time on a valid UnicodeString. 3031 * 3032 * It returns 0 if the string is bogus, or 3033 * during an "open" getBuffer(minCapacity). 3034 * 3035 * It can be called as many times as desired. 3036 * The pointer that it returns will remain valid until the UnicodeString object is modified, 3037 * at which time the pointer is semantically invalidated and must not be used any more. 3038 * 3039 * The capacity of the buffer can be determined with getCapacity(). 3040 * The part after length() may or may not be initialized and valid, 3041 * depending on the history of the UnicodeString object. 3042 * 3043 * The buffer contents is (probably) not NUL-terminated. 3044 * You can check if it is with 3045 * `(s.length() < s.getCapacity() && buffer[s.length()]==0)`. 3046 * (See getTerminatedBuffer().) 3047 * 3048 * The buffer may reside in read-only memory. Its contents must not 3049 * be modified. 3050 * 3051 * @return a read-only pointer to the internal string buffer, 3052 * or nullptr if the string is empty or bogus 3053 * 3054 * @see getBuffer(int32_t minCapacity) 3055 * @see getTerminatedBuffer() 3056 * @stable ICU 2.0 3057 */ 3058 inline const char16_t *getBuffer() const; 3059 3060 /** 3061 * Get a read-only pointer to the internal buffer, 3062 * making sure that it is NUL-terminated. 3063 * This can be called at any time on a valid UnicodeString. 3064 * 3065 * It returns 0 if the string is bogus, or 3066 * during an "open" getBuffer(minCapacity), or if the buffer cannot 3067 * be NUL-terminated (because memory allocation failed). 3068 * 3069 * It can be called as many times as desired. 3070 * The pointer that it returns will remain valid until the UnicodeString object is modified, 3071 * at which time the pointer is semantically invalidated and must not be used any more. 3072 * 3073 * The capacity of the buffer can be determined with getCapacity(). 3074 * The part after length()+1 may or may not be initialized and valid, 3075 * depending on the history of the UnicodeString object. 3076 * 3077 * The buffer contents is guaranteed to be NUL-terminated. 3078 * getTerminatedBuffer() may reallocate the buffer if a terminating NUL 3079 * is written. 3080 * For this reason, this function is not const, unlike getBuffer(). 3081 * Note that a UnicodeString may also contain NUL characters as part of its contents. 3082 * 3083 * The buffer may reside in read-only memory. Its contents must not 3084 * be modified. 3085 * 3086 * @return a read-only pointer to the internal string buffer, 3087 * or 0 if the string is empty or bogus 3088 * 3089 * @see getBuffer(int32_t minCapacity) 3090 * @see getBuffer() 3091 * @stable ICU 2.2 3092 */ 3093 const char16_t *getTerminatedBuffer(); 3094 3095 /** 3096 * Converts to a std::u16string_view. 3097 * 3098 * @return a string view of the contents of this string 3099 * @stable ICU 76 3100 */ 3101 inline operator std::u16string_view() const { 3102 return {getBuffer(), static_cast<std::u16string_view::size_type>(length())}; 3103 } 3104 3105 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) 3106 /** 3107 * Converts to a std::wstring_view. 3108 * 3109 * Note: This should remain draft until C++ standard plans 3110 * about char16_t vs. wchar_t become clearer. 3111 * 3112 * @return a string view of the contents of this string 3113 * @stable ICU 76 3114 */ 3115 inline operator std::wstring_view() const { 3116 const char16_t *p = getBuffer(); 3117 #ifdef U_ALIASING_BARRIER 3118 U_ALIASING_BARRIER(p); 3119 #endif 3120 return { reinterpret_cast<const wchar_t *>(p), (std::wstring_view::size_type)length() }; 3121 } 3122 #endif // U_SIZEOF_WCHAR_T 3123 3124 //======================================== 3125 // Constructors 3126 //======================================== 3127 3128 /** Construct an empty UnicodeString. 3129 * @stable ICU 2.0 3130 */ 3131 inline UnicodeString(); 3132 3133 /** 3134 * Construct a UnicodeString with capacity to hold `capacity` char16_ts 3135 * @param capacity the number of char16_ts this UnicodeString should hold 3136 * before a resize is necessary; if count is greater than 0 and count 3137 * code points c take up more space than capacity, then capacity is adjusted 3138 * accordingly. 3139 * @param c is used to initially fill the string 3140 * @param count specifies how many code points c are to be written in the 3141 * string 3142 * @stable ICU 2.0 3143 */ 3144 UnicodeString(int32_t capacity, UChar32 c, int32_t count); 3145 3146 /** 3147 * Single char16_t (code unit) constructor. 3148 * 3149 * It is recommended to mark this constructor "explicit" by 3150 * `-DUNISTR_FROM_CHAR_EXPLICIT=explicit` 3151 * on the compiler command line or similar. 3152 * @param ch the character to place in the UnicodeString 3153 * @stable ICU 2.0 3154 */ 3155 UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch); 3156 3157 /** 3158 * Single UChar32 (code point) constructor. 3159 * 3160 * It is recommended to mark this constructor "explicit" by 3161 * `-DUNISTR_FROM_CHAR_EXPLICIT=explicit` 3162 * on the compiler command line or similar. 3163 * @param ch the character to place in the UnicodeString 3164 * @stable ICU 2.0 3165 */ 3166 UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch); 3167 3168 #ifdef U_HIDE_DRAFT_API 3169 /** 3170 * char16_t* constructor. 3171 * 3172 * It is recommended to mark this constructor "explicit" by 3173 * `-DUNISTR_FROM_STRING_EXPLICIT=explicit` 3174 * on the compiler command line or similar. 3175 * 3176 * Note, for string literals: 3177 * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time 3178 * length determination: 3179 * \code 3180 * UnicodeString str(u"literal"); 3181 * if (str == u"other literal") { ... } 3182 * \endcode 3183 * 3184 * @param text The characters to place in the UnicodeString. `text` 3185 * must be NUL (U+0000) terminated. 3186 * @stable ICU 2.0 3187 */ 3188 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text) : 3189 UnicodeString(text, -1) {} 3190 #endif // U_HIDE_DRAFT_API 3191 3192 #if !U_CHAR16_IS_TYPEDEF && \ 3193 (defined(U_HIDE_DRAFT_API) || (defined(_LIBCPP_VERSION) && _LIBCPP_VERSION >= 180000)) 3194 /** 3195 * uint16_t * constructor. 3196 * Delegates to UnicodeString(const char16_t *). 3197 * 3198 * It is recommended to mark this constructor "explicit" by 3199 * `-DUNISTR_FROM_STRING_EXPLICIT=explicit` 3200 * on the compiler command line or similar. 3201 * 3202 * Note, for string literals: 3203 * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time 3204 * length determination: 3205 * \code 3206 * UnicodeString str(u"literal"); 3207 * if (str == u"other literal") { ... } 3208 * \endcode 3209 * 3210 * @param text NUL-terminated UTF-16 string 3211 * @stable ICU 59 3212 */ 3213 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) : 3214 UnicodeString(ConstChar16Ptr(text), -1) {} 3215 #endif 3216 3217 #if defined(U_HIDE_DRAFT_API) && (U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)) 3218 /** 3219 * wchar_t * constructor. 3220 * (Only defined if U_SIZEOF_WCHAR_T==2.) 3221 * Delegates to UnicodeString(const char16_t *). 3222 * 3223 * It is recommended to mark this constructor "explicit" by 3224 * `-DUNISTR_FROM_STRING_EXPLICIT=explicit` 3225 * on the compiler command line or similar. 3226 * 3227 * Note, for string literals: 3228 * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time 3229 * length determination: 3230 * \code 3231 * UnicodeString str(u"literal"); 3232 * if (str == u"other literal") { ... } 3233 * \endcode 3234 * 3235 * @param text NUL-terminated UTF-16 string 3236 * @stable ICU 59 3237 */ 3238 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) : 3239 UnicodeString(ConstChar16Ptr(text), -1) {} 3240 #endif 3241 3242 /** 3243 * nullptr_t constructor. 3244 * Effectively the same as the default constructor, makes an empty string object. 3245 * 3246 * It is recommended to mark this constructor "explicit" by 3247 * `-DUNISTR_FROM_STRING_EXPLICIT=explicit` 3248 * on the compiler command line or similar. 3249 * @param text nullptr 3250 * @stable ICU 59 3251 */ 3252 UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text); 3253 3254 /** 3255 * char16_t* constructor. 3256 * 3257 * Note, for string literals: 3258 * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time 3259 * length determination: 3260 * \code 3261 * UnicodeString str(u"literal"); 3262 * if (str == u"other literal") { ... } 3263 * \endcode 3264 * 3265 * @param text The characters to place in the UnicodeString. 3266 * @param textLength The number of Unicode characters in `text` 3267 * to copy. 3268 * @stable ICU 2.0 3269 */ 3270 UnicodeString(const char16_t *text, 3271 int32_t textLength); 3272 3273 #if !U_CHAR16_IS_TYPEDEF 3274 /** 3275 * uint16_t * constructor. 3276 * Delegates to UnicodeString(const char16_t *, int32_t). 3277 * 3278 * Note, for string literals: 3279 * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time 3280 * length determination: 3281 * \code 3282 * UnicodeString str(u"literal"); 3283 * if (str == u"other literal") { ... } 3284 * \endcode 3285 * 3286 * @param text UTF-16 string 3287 * @param textLength string length 3288 * @stable ICU 59 3289 */ 3290 UnicodeString(const uint16_t *text, int32_t textLength) : 3291 UnicodeString(ConstChar16Ptr(text), textLength) {} 3292 #endif 3293 3294 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) 3295 /** 3296 * wchar_t * constructor. 3297 * (Only defined if U_SIZEOF_WCHAR_T==2.) 3298 * Delegates to UnicodeString(const char16_t *, int32_t). 3299 * 3300 * Note, for string literals: 3301 * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time 3302 * length determination: 3303 * \code 3304 * UnicodeString str(u"literal"); 3305 * if (str == u"other literal") { ... } 3306 * \endcode 3307 * 3308 * @param text UTF-16 string 3309 * @param textLength string length 3310 * @stable ICU 59 3311 */ 3312 UnicodeString(const wchar_t *text, int32_t textLength) : 3313 UnicodeString(ConstChar16Ptr(text), textLength) {} 3314 #endif 3315 3316 /** 3317 * nullptr_t constructor. 3318 * Effectively the same as the default constructor, makes an empty string object. 3319 * @param text nullptr 3320 * @param textLength ignored 3321 * @stable ICU 59 3322 */ 3323 inline UnicodeString(const std::nullptr_t text, int32_t textLength); 3324 3325 /** 3326 * Constructor from `text` 3327 * which is, or which is implicitly convertible to, 3328 * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view. 3329 * The string is bogus if the string view is too long. 3330 * 3331 * If you need a UnicodeString but need not copy the string view contents, 3332 * then you can call the UnicodeString::readOnlyAlias() function instead of this constructor. 3333 * 3334 * @param text UTF-16 string 3335 * @stable ICU 76 3336 */ 3337 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>> 3338 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const S &text) { 3339 fUnion.fFields.fLengthAndFlags = kShortString; 3340 doAppend(internal::toU16StringViewNullable(text)); 3341 } 3342 3343 /** 3344 * Readonly-aliasing char16_t* constructor. 3345 * The text will be used for the UnicodeString object, but 3346 * it will not be released when the UnicodeString is destroyed. 3347 * This has copy-on-write semantics: 3348 * When the string is modified, then the buffer is first copied into 3349 * newly allocated memory. 3350 * The aliased buffer is never modified. 3351 * 3352 * In an assignment to another UnicodeString, when using the copy constructor 3353 * or the assignment operator, the text will be copied. 3354 * When using fastCopyFrom(), the text will be aliased again, 3355 * so that both strings then alias the same readonly-text. 3356 * 3357 * Note, for string literals: 3358 * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time 3359 * length determination: 3360 * \code 3361 * UnicodeString alias = UnicodeString::readOnlyAlias(u"literal"); 3362 * if (str == u"other literal") { ... } 3363 * \endcode 3364 * 3365 * @param isTerminated specifies if `text` is `NUL`-terminated. 3366 * This must be true if `textLength==-1`. 3367 * @param text The characters to alias for the UnicodeString. 3368 * @param textLength The number of Unicode characters in `text` to alias. 3369 * If -1, then this constructor will determine the length 3370 * by calling `u_strlen()`. 3371 * @stable ICU 2.0 3372 */ 3373 UnicodeString(UBool isTerminated, 3374 ConstChar16Ptr text, 3375 int32_t textLength); 3376 3377 /** 3378 * Writable-aliasing char16_t* constructor. 3379 * The text will be used for the UnicodeString object, but 3380 * it will not be released when the UnicodeString is destroyed. 3381 * This has write-through semantics: 3382 * For as long as the capacity of the buffer is sufficient, write operations 3383 * will directly affect the buffer. When more capacity is necessary, then 3384 * a new buffer will be allocated and the contents copied as with regularly 3385 * constructed strings. 3386 * In an assignment to another UnicodeString, the buffer will be copied. 3387 * The extract(Char16Ptr dst) function detects whether the dst pointer is the same 3388 * as the string buffer itself and will in this case not copy the contents. 3389 * 3390 * @param buffer The characters to alias for the UnicodeString. 3391 * @param buffLength The number of Unicode characters in `buffer` to alias. 3392 * @param buffCapacity The size of `buffer` in char16_ts. 3393 * @stable ICU 2.0 3394 */ 3395 UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity); 3396 3397 #if !U_CHAR16_IS_TYPEDEF 3398 /** 3399 * Writable-aliasing uint16_t * constructor. 3400 * Delegates to UnicodeString(const char16_t *, int32_t, int32_t). 3401 * @param buffer writable buffer of/for UTF-16 text 3402 * @param buffLength length of the current buffer contents 3403 * @param buffCapacity buffer capacity 3404 * @stable ICU 59 3405 */ 3406 UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) : 3407 UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {} 3408 #endif 3409 3410 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) 3411 /** 3412 * Writable-aliasing wchar_t * constructor. 3413 * (Only defined if U_SIZEOF_WCHAR_T==2.) 3414 * Delegates to UnicodeString(const char16_t *, int32_t, int32_t). 3415 * @param buffer writable buffer of/for UTF-16 text 3416 * @param buffLength length of the current buffer contents 3417 * @param buffCapacity buffer capacity 3418 * @stable ICU 59 3419 */ 3420 UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) : 3421 UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {} 3422 #endif 3423 3424 /** 3425 * Writable-aliasing nullptr_t constructor. 3426 * Effectively the same as the default constructor, makes an empty string object. 3427 * @param buffer nullptr 3428 * @param buffLength ignored 3429 * @param buffCapacity ignored 3430 * @stable ICU 59 3431 */ 3432 inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity); 3433 3434 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION 3435 3436 /** 3437 * char* constructor. 3438 * Uses the default converter (and thus depends on the ICU conversion code) 3439 * unless U_CHARSET_IS_UTF8 is set to 1. 3440 * 3441 * For ASCII (really "invariant character") strings it is more efficient to use 3442 * the constructor that takes a US_INV (for its enum EInvariant). 3443 * 3444 * Note, for string literals: 3445 * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time 3446 * length determination: 3447 * \code 3448 * UnicodeString str(u"literal"); 3449 * if (str == u"other literal") { ... } 3450 * \endcode 3451 * 3452 * It is recommended to mark this constructor "explicit" by 3453 * `-DUNISTR_FROM_STRING_EXPLICIT=explicit` 3454 * on the compiler command line or similar. 3455 * @param codepageData an array of bytes, null-terminated, 3456 * in the platform's default codepage. 3457 * @stable ICU 2.0 3458 */ 3459 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData); 3460 3461 /** 3462 * char* constructor. 3463 * Uses the default converter (and thus depends on the ICU conversion code) 3464 * unless U_CHARSET_IS_UTF8 is set to 1. 3465 * @param codepageData an array of bytes in the platform's default codepage. 3466 * @param dataLength The number of bytes in `codepageData`. 3467 * @stable ICU 2.0 3468 */ 3469 UnicodeString(const char *codepageData, int32_t dataLength); 3470 3471 #endif 3472 3473 #if !UCONFIG_NO_CONVERSION 3474 3475 /** 3476 * char* constructor. 3477 * @param codepageData an array of bytes, null-terminated 3478 * @param codepage the encoding of `codepageData`. The special 3479 * value 0 for `codepage` indicates that the text is in the 3480 * platform's default codepage. 3481 * 3482 * If `codepage` is an empty string (`""`), 3483 * then a simple conversion is performed on the codepage-invariant 3484 * subset ("invariant characters") of the platform encoding. See utypes.h. 3485 * Recommendation: For invariant-character strings use the constructor 3486 * UnicodeString(const char *src, int32_t length, enum EInvariant inv) 3487 * because it avoids object code dependencies of UnicodeString on 3488 * the conversion code. 3489 * 3490 * @stable ICU 2.0 3491 */ 3492 UnicodeString(const char *codepageData, const char *codepage); 3493 3494 /** 3495 * char* constructor. 3496 * @param codepageData an array of bytes. 3497 * @param dataLength The number of bytes in `codepageData`. 3498 * @param codepage the encoding of `codepageData`. The special 3499 * value 0 for `codepage` indicates that the text is in the 3500 * platform's default codepage. 3501 * If `codepage` is an empty string (`""`), 3502 * then a simple conversion is performed on the codepage-invariant 3503 * subset ("invariant characters") of the platform encoding. See utypes.h. 3504 * Recommendation: For invariant-character strings use the constructor 3505 * UnicodeString(const char *src, int32_t length, enum EInvariant inv) 3506 * because it avoids object code dependencies of UnicodeString on 3507 * the conversion code. 3508 * 3509 * @stable ICU 2.0 3510 */ 3511 UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage); 3512 3513 /** 3514 * char * / UConverter constructor. 3515 * This constructor uses an existing UConverter object to 3516 * convert the codepage string to Unicode and construct a UnicodeString 3517 * from that. 3518 * 3519 * The converter is reset at first. 3520 * If the error code indicates a failure before this constructor is called, 3521 * or if an error occurs during conversion or construction, 3522 * then the string will be bogus. 3523 * 3524 * This function avoids the overhead of opening and closing a converter if 3525 * multiple strings are constructed. 3526 * 3527 * @param src input codepage string 3528 * @param srcLength length of the input string, can be -1 for NUL-terminated strings 3529 * @param cnv converter object (ucnv_resetToUnicode() will be called), 3530 * can be nullptr for the default converter 3531 * @param errorCode normal ICU error code 3532 * @stable ICU 2.0 3533 */ 3534 UnicodeString( 3535 const char *src, int32_t srcLength, 3536 UConverter *cnv, 3537 UErrorCode &errorCode); 3538 3539 #endif 3540 3541 /** 3542 * Constructs a Unicode string from an invariant-character char * string. 3543 * About invariant characters see utypes.h. 3544 * This constructor has no runtime dependency on conversion code and is 3545 * therefore recommended over ones taking a charset name string 3546 * (where the empty string "" indicates invariant-character conversion). 3547 * 3548 * Use the macro US_INV as the third, signature-distinguishing parameter. 3549 * 3550 * For example: 3551 * \code 3552 * void fn(const char *s) { 3553 * UnicodeString ustr(s, -1, US_INV); 3554 * // use ustr ... 3555 * } 3556 * \endcode 3557 * 3558 * Note, for string literals: 3559 * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time 3560 * length determination: 3561 * \code 3562 * UnicodeString str(u"literal"); 3563 * if (str == u"other literal") { ... } 3564 * \endcode 3565 * 3566 * @param src String using only invariant characters. 3567 * @param textLength Length of src, or -1 if NUL-terminated. 3568 * @param inv Signature-distinguishing parameter, use US_INV. 3569 * 3570 * @see US_INV 3571 * @stable ICU 3.2 3572 */ 3573 UnicodeString(const char *src, int32_t textLength, enum EInvariant inv); 3574 3575 3576 /** 3577 * Copy constructor. 3578 * 3579 * Starting with ICU 2.4, the assignment operator and the copy constructor 3580 * allocate a new buffer and copy the buffer contents even for readonly aliases. 3581 * By contrast, the fastCopyFrom() function implements the old, 3582 * more efficient but less safe behavior 3583 * of making this string also a readonly alias to the same buffer. 3584 * 3585 * If the source object has an "open" buffer from getBuffer(minCapacity), 3586 * then the copy is an empty string. 3587 * 3588 * @param that The UnicodeString object to copy. 3589 * @stable ICU 2.0 3590 * @see fastCopyFrom 3591 */ 3592 UnicodeString(const UnicodeString& that); 3593 3594 /** 3595 * Move constructor; might leave src in bogus state. 3596 * This string will have the same contents and state that the source string had. 3597 * @param src source string 3598 * @stable ICU 56 3599 */ 3600 UnicodeString(UnicodeString &&src) noexcept; 3601 3602 /** 3603 * 'Substring' constructor from tail of source string. 3604 * @param src The UnicodeString object to copy. 3605 * @param srcStart The offset into `src` at which to start copying. 3606 * @stable ICU 2.2 3607 */ 3608 UnicodeString(const UnicodeString& src, int32_t srcStart); 3609 3610 /** 3611 * 'Substring' constructor from subrange of source string. 3612 * @param src The UnicodeString object to copy. 3613 * @param srcStart The offset into `src` at which to start copying. 3614 * @param srcLength The number of characters from `src` to copy. 3615 * @stable ICU 2.2 3616 */ 3617 UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength); 3618 3619 /** 3620 * Clone this object, an instance of a subclass of Replaceable. 3621 * Clones can be used concurrently in multiple threads. 3622 * If a subclass does not implement clone(), or if an error occurs, 3623 * then nullptr is returned. 3624 * The caller must delete the clone. 3625 * 3626 * @return a clone of this object 3627 * 3628 * @see Replaceable::clone 3629 * @see getDynamicClassID 3630 * @stable ICU 2.6 3631 */ 3632 virtual UnicodeString *clone() const override; 3633 3634 /** Destructor. 3635 * @stable ICU 2.0 3636 */ 3637 virtual ~UnicodeString(); 3638 3639 /** 3640 * Readonly-aliasing factory method. 3641 * Aliases the same buffer as the input `text` 3642 * which is, or which is implicitly convertible to, 3643 * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view. 3644 * The string is bogus if the string view is too long. 3645 * 3646 * The text will be used for the UnicodeString object, but 3647 * it will not be released when the UnicodeString is destroyed. 3648 * This has copy-on-write semantics: 3649 * When the string is modified, then the buffer is first copied into 3650 * newly allocated memory. 3651 * The aliased buffer is never modified. 3652 * 3653 * In an assignment to another UnicodeString, when using the copy constructor 3654 * or the assignment operator, the text will be copied. 3655 * When using fastCopyFrom(), the text will be aliased again, 3656 * so that both strings then alias the same readonly-text. 3657 * 3658 * @param text The string view to alias for the UnicodeString. 3659 * @stable ICU 76 3660 */ 3661 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>> 3662 static inline UnicodeString readOnlyAlias(const S &text) { 3663 return readOnlyAliasFromU16StringView(internal::toU16StringView(text)); 3664 } 3665 3666 /** 3667 * Readonly-aliasing factory method. 3668 * Aliases the same buffer as the input `text`. 3669 * 3670 * The text will be used for the UnicodeString object, but 3671 * it will not be released when the UnicodeString is destroyed. 3672 * This has copy-on-write semantics: 3673 * When the string is modified, then the buffer is first copied into 3674 * newly allocated memory. 3675 * The aliased buffer is never modified. 3676 * 3677 * In an assignment to another UnicodeString, when using the copy constructor 3678 * or the assignment operator, the text will be copied. 3679 * When using fastCopyFrom(), the text will be aliased again, 3680 * so that both strings then alias the same readonly-text. 3681 * 3682 * @param text The UnicodeString to alias. 3683 * @stable ICU 76 3684 */ 3685 static inline UnicodeString readOnlyAlias(const UnicodeString &text) { 3686 return readOnlyAliasFromUnicodeString(text); 3687 } 3688 3689 /** 3690 * Create a UnicodeString from a UTF-8 string. 3691 * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string. 3692 * Calls u_strFromUTF8WithSub(). 3693 * 3694 * @param utf8 UTF-8 input string. 3695 * Note that a StringPiece can be implicitly constructed 3696 * from a std::string or a NUL-terminated const char * string. 3697 * @return A UnicodeString with equivalent UTF-16 contents. 3698 * @see toUTF8 3699 * @see toUTF8String 3700 * @stable ICU 4.2 3701 */ 3702 static UnicodeString fromUTF8(StringPiece utf8); 3703 3704 /** 3705 * Create a UnicodeString from a UTF-32 string. 3706 * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string. 3707 * Calls u_strFromUTF32WithSub(). 3708 * 3709 * @param utf32 UTF-32 input string. Must not be nullptr. 3710 * @param length Length of the input string, or -1 if NUL-terminated. 3711 * @return A UnicodeString with equivalent UTF-16 contents. 3712 * @see toUTF32 3713 * @stable ICU 4.2 3714 */ 3715 static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length); 3716 3717 /* Miscellaneous operations */ 3718 3719 /** 3720 * Unescape a string of characters and return a string containing 3721 * the result. The following escape sequences are recognized: 3722 * 3723 * \\uhhhh 4 hex digits; h in [0-9A-Fa-f] 3724 * \\Uhhhhhhhh 8 hex digits 3725 * \\xhh 1-2 hex digits 3726 * \\ooo 1-3 octal digits; o in [0-7] 3727 * \\cX control-X; X is masked with 0x1F 3728 * 3729 * as well as the standard ANSI C escapes: 3730 * 3731 * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A, 3732 * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B, 3733 * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C 3734 * 3735 * Anything else following a backslash is generically escaped. For 3736 * example, "[a\\-z]" returns "[a-z]". 3737 * 3738 * If an escape sequence is ill-formed, this method returns an empty 3739 * string. An example of an ill-formed sequence is "\\u" followed by 3740 * fewer than 4 hex digits. 3741 * 3742 * This function is similar to u_unescape() but not identical to it. 3743 * The latter takes a source char*, so it does escape recognition 3744 * and also invariant conversion. 3745 * 3746 * @return a string with backslash escapes interpreted, or an 3747 * empty string on error. 3748 * @see UnicodeString#unescapeAt() 3749 * @see u_unescape() 3750 * @see u_unescapeAt() 3751 * @stable ICU 2.0 3752 */ 3753 UnicodeString unescape() const; 3754 3755 /** 3756 * Unescape a single escape sequence and return the represented 3757 * character. See unescape() for a listing of the recognized escape 3758 * sequences. The character at offset-1 is assumed (without 3759 * checking) to be a backslash. If the escape sequence is 3760 * ill-formed, or the offset is out of range, U_SENTINEL=-1 is 3761 * returned. 3762 * 3763 * @param offset an input output parameter. On input, it is the 3764 * offset into this string where the escape sequence is located, 3765 * after the initial backslash. On output, it is advanced after the 3766 * last character parsed. On error, it is not advanced at all. 3767 * @return the character represented by the escape sequence at 3768 * offset, or U_SENTINEL=-1 on error. 3769 * @see UnicodeString#unescape() 3770 * @see u_unescape() 3771 * @see u_unescapeAt() 3772 * @stable ICU 2.0 3773 */ 3774 UChar32 unescapeAt(int32_t &offset) const; 3775 3776 /** 3777 * ICU "poor man's RTTI", returns a UClassID for this class. 3778 * 3779 * @stable ICU 2.2 3780 */ 3781 static UClassID U_EXPORT2 getStaticClassID(); 3782 3783 /** 3784 * ICU "poor man's RTTI", returns a UClassID for the actual class. 3785 * 3786 * @stable ICU 2.2 3787 */ 3788 virtual UClassID getDynamicClassID() const override; 3789 3790 //======================================== 3791 // Implementation methods 3792 //======================================== 3793 3794 protected: 3795 /** 3796 * Implement Replaceable::getLength() (see jitterbug 1027). 3797 * @stable ICU 2.4 3798 */ 3799 virtual int32_t getLength() const override; 3800 3801 /** 3802 * The change in Replaceable to use virtual getCharAt() allows 3803 * UnicodeString::charAt() to be inline again (see jitterbug 709). 3804 * @stable ICU 2.4 3805 */ 3806 virtual char16_t getCharAt(int32_t offset) const override; 3807 3808 /** 3809 * The change in Replaceable to use virtual getChar32At() allows 3810 * UnicodeString::char32At() to be inline again (see jitterbug 709). 3811 * @stable ICU 2.4 3812 */ 3813 virtual UChar32 getChar32At(int32_t offset) const override; 3814 3815 private: 3816 static UnicodeString readOnlyAliasFromU16StringView(std::u16string_view text); 3817 static UnicodeString readOnlyAliasFromUnicodeString(const UnicodeString &text); 3818 3819 // For char* constructors. Could be made public. 3820 UnicodeString &setToUTF8(StringPiece utf8); 3821 // For extract(char*). 3822 // We could make a toUTF8(target, capacity, errorCode) public but not 3823 // this version: New API will be cleaner if we make callers create substrings 3824 // rather than having start+length on every method, 3825 // and it should take a UErrorCode&. 3826 int32_t 3827 toUTF8(int32_t start, int32_t len, 3828 char *target, int32_t capacity) const; 3829 3830 /** 3831 * Internal string contents comparison, called by operator==. 3832 * Requires: this & text not bogus and have same lengths. 3833 */ 3834 inline UBool doEquals(const UnicodeString &text, int32_t len) const { 3835 return doEquals(text.getArrayStart(), len); 3836 } 3837 UBool doEquals(const char16_t *text, int32_t len) const; 3838 3839 inline UBool 3840 doEqualsSubstring(int32_t start, 3841 int32_t length, 3842 const UnicodeString& srcText, 3843 int32_t srcStart, 3844 int32_t srcLength) const; 3845 3846 UBool doEqualsSubstring(int32_t start, 3847 int32_t length, 3848 const char16_t *srcChars, 3849 int32_t srcStart, 3850 int32_t srcLength) const; 3851 3852 inline int8_t 3853 doCompare(int32_t start, 3854 int32_t length, 3855 const UnicodeString& srcText, 3856 int32_t srcStart, 3857 int32_t srcLength) const; 3858 3859 int8_t doCompare(int32_t start, 3860 int32_t length, 3861 const char16_t *srcChars, 3862 int32_t srcStart, 3863 int32_t srcLength) const; 3864 3865 inline int8_t 3866 doCompareCodePointOrder(int32_t start, 3867 int32_t length, 3868 const UnicodeString& srcText, 3869 int32_t srcStart, 3870 int32_t srcLength) const; 3871 3872 int8_t doCompareCodePointOrder(int32_t start, 3873 int32_t length, 3874 const char16_t *srcChars, 3875 int32_t srcStart, 3876 int32_t srcLength) const; 3877 3878 inline int8_t 3879 doCaseCompare(int32_t start, 3880 int32_t length, 3881 const UnicodeString &srcText, 3882 int32_t srcStart, 3883 int32_t srcLength, 3884 uint32_t options) const; 3885 3886 int8_t 3887 doCaseCompare(int32_t start, 3888 int32_t length, 3889 const char16_t *srcChars, 3890 int32_t srcStart, 3891 int32_t srcLength, 3892 uint32_t options) const; 3893 3894 int32_t doIndexOf(char16_t c, 3895 int32_t start, 3896 int32_t length) const; 3897 3898 int32_t doIndexOf(UChar32 c, 3899 int32_t start, 3900 int32_t length) const; 3901 3902 int32_t doLastIndexOf(char16_t c, 3903 int32_t start, 3904 int32_t length) const; 3905 3906 int32_t doLastIndexOf(UChar32 c, 3907 int32_t start, 3908 int32_t length) const; 3909 3910 void doExtract(int32_t start, 3911 int32_t length, 3912 char16_t *dst, 3913 int32_t dstStart) const; 3914 3915 inline void doExtract(int32_t start, 3916 int32_t length, 3917 UnicodeString& target) const; 3918 3919 inline char16_t doCharAt(int32_t offset) const; 3920 3921 UnicodeString& doReplace(int32_t start, 3922 int32_t length, 3923 const UnicodeString& srcText, 3924 int32_t srcStart, 3925 int32_t srcLength); 3926 3927 UnicodeString& doReplace(int32_t start, 3928 int32_t length, 3929 const char16_t *srcChars, 3930 int32_t srcStart, 3931 int32_t srcLength); 3932 UnicodeString& doReplace(int32_t start, int32_t length, std::u16string_view src); 3933 3934 UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength); 3935 UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength); 3936 UnicodeString& doAppend(std::u16string_view src); 3937 3938 UnicodeString& doReverse(int32_t start, 3939 int32_t length); 3940 3941 // calculate hash code 3942 int32_t doHashCode() const; 3943 3944 // get pointer to start of array 3945 // these do not check for kOpenGetBuffer, unlike the public getBuffer() function 3946 inline char16_t* getArrayStart(); 3947 inline const char16_t* getArrayStart() const; 3948 3949 inline UBool hasShortLength() const; 3950 inline int32_t getShortLength() const; 3951 3952 // A UnicodeString object (not necessarily its current buffer) 3953 // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity). 3954 inline UBool isWritable() const; 3955 3956 // Is the current buffer writable? 3957 inline UBool isBufferWritable() const; 3958 3959 // None of the following does releaseArray(). 3960 inline void setZeroLength(); 3961 inline void setShortLength(int32_t len); 3962 inline void setLength(int32_t len); 3963 inline void setToEmpty(); 3964 inline void setArray(char16_t *array, int32_t len, int32_t capacity); // sets length but not flags 3965 3966 // allocate the array; result may be the stack buffer 3967 // sets refCount to 1 if appropriate 3968 // sets fArray, fCapacity, and flags 3969 // sets length to 0 3970 // returns boolean for success or failure 3971 UBool allocate(int32_t capacity); 3972 3973 // release the array if owned 3974 void releaseArray(); 3975 3976 // turn a bogus string into an empty one 3977 void unBogus(); 3978 3979 // implements assignment operator, copy constructor, and fastCopyFrom() 3980 UnicodeString ©From(const UnicodeString &src, UBool fastCopy=false); 3981 3982 // Copies just the fields without memory management. 3983 void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) noexcept; 3984 3985 // Pin start and limit to acceptable values. 3986 inline void pinIndex(int32_t& start) const; 3987 inline void pinIndices(int32_t& start, 3988 int32_t& length) const; 3989 3990 #if !UCONFIG_NO_CONVERSION 3991 3992 /* Internal extract() using UConverter. */ 3993 int32_t doExtract(int32_t start, int32_t length, 3994 char *dest, int32_t destCapacity, 3995 UConverter *cnv, 3996 UErrorCode &errorCode) const; 3997 3998 /* 3999 * Real constructor for converting from codepage data. 4000 * It assumes that it is called with !fRefCounted. 4001 * 4002 * If `codepage==0`, then the default converter 4003 * is used for the platform encoding. 4004 * If `codepage` is an empty string (`""`), 4005 * then a simple conversion is performed on the codepage-invariant 4006 * subset ("invariant characters") of the platform encoding. See utypes.h. 4007 */ 4008 void doCodepageCreate(const char *codepageData, 4009 int32_t dataLength, 4010 const char *codepage); 4011 4012 /* 4013 * Worker function for creating a UnicodeString from 4014 * a codepage string using a UConverter. 4015 */ 4016 void 4017 doCodepageCreate(const char *codepageData, 4018 int32_t dataLength, 4019 UConverter *converter, 4020 UErrorCode &status); 4021 4022 #endif 4023 4024 /* 4025 * This function is called when write access to the array 4026 * is necessary. 4027 * 4028 * We need to make a copy of the array if 4029 * the buffer is read-only, or 4030 * the buffer is refCounted (shared), and refCount>1, or 4031 * the buffer is too small. 4032 * 4033 * Return false if memory could not be allocated. 4034 */ 4035 UBool cloneArrayIfNeeded(int32_t newCapacity = -1, 4036 int32_t growCapacity = -1, 4037 UBool doCopyArray = true, 4038 int32_t** pBufferToDelete = nullptr, 4039 UBool forceClone = false); 4040 4041 /** 4042 * Common function for UnicodeString case mappings. 4043 * The stringCaseMapper has the same type UStringCaseMapper 4044 * as in ustr_imp.h for ustrcase_map(). 4045 */ 4046 UnicodeString & 4047 caseMap(int32_t caseLocale, uint32_t options, 4048 #if !UCONFIG_NO_BREAK_ITERATION 4049 BreakIterator *iter, 4050 #endif 4051 UStringCaseMapper *stringCaseMapper); 4052 4053 // ref counting 4054 void addRef(); 4055 int32_t removeRef(); 4056 int32_t refCount() const; 4057 4058 // constants 4059 enum { 4060 /** 4061 * Size of stack buffer for short strings. 4062 * Must be at least U16_MAX_LENGTH for the single-code point constructor to work. 4063 * @see UNISTR_OBJECT_SIZE 4064 */ 4065 US_STACKBUF_SIZE = static_cast<int32_t>(UNISTR_OBJECT_SIZE - sizeof(void*) - 2) / U_SIZEOF_UCHAR, 4066 kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index) 4067 kInvalidHashCode=0, // invalid hash code 4068 kEmptyHashCode=1, // hash code for empty string 4069 4070 // bit flag values for fLengthAndFlags 4071 kIsBogus=1, // this string is bogus, i.e., not valid or nullptr 4072 kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields 4073 kRefCounted=4, // there is a refCount field before the characters in fArray 4074 kBufferIsReadonly=8,// do not write to this buffer 4075 kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"), 4076 // and releaseBuffer(newLength) must be called 4077 kAllStorageFlags=0x1f, 4078 4079 kLengthShift=5, // remaining 11 bits for non-negative short length, or negative if long 4080 kLength1=1<<kLengthShift, 4081 kMaxShortLength=0x3ff, // max non-negative short length (leaves top bit 0) 4082 kLengthIsLarge=0xffe0, // short length < 0, real length is in fUnion.fFields.fLength 4083 4084 // combined values for convenience 4085 kShortString=kUsingStackBuffer, 4086 kLongString=kRefCounted, 4087 kReadonlyAlias=kBufferIsReadonly, 4088 kWritableAlias=0 4089 }; 4090 4091 friend class UnicodeStringAppendable; 4092 4093 union StackBufferOrFields; // forward declaration necessary before friend declaration 4094 friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion 4095 4096 /* 4097 * The following are all the class fields that are stored 4098 * in each UnicodeString object. 4099 * Note that UnicodeString has virtual functions, 4100 * therefore there is an implicit vtable pointer 4101 * as the first real field. 4102 * The fields should be aligned such that no padding is necessary. 4103 * On 32-bit machines, the size should be 32 bytes, 4104 * on 64-bit machines (8-byte pointers), it should be 40 bytes. 4105 * 4106 * We use a hack to achieve this. 4107 * 4108 * With at least some compilers, each of the following is forced to 4109 * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer], 4110 * rounded up with additional padding if the fields do not already fit that requirement: 4111 * - sizeof(class UnicodeString) 4112 * - offsetof(UnicodeString, fUnion) 4113 * - sizeof(fUnion) 4114 * - sizeof(fStackFields) 4115 * 4116 * We optimize for the longest possible internal buffer for short strings. 4117 * fUnion.fStackFields begins with 2 bytes for storage flags 4118 * and the length of relatively short strings, 4119 * followed by the buffer for short string contents. 4120 * There is no padding inside fStackFields. 4121 * 4122 * Heap-allocated and aliased strings use fUnion.fFields. 4123 * Both fStackFields and fFields must begin with the same fields for flags and short length, 4124 * that is, those must have the same memory offsets inside the object, 4125 * because the flags must be inspected in order to decide which half of fUnion is being used. 4126 * We assume that the compiler does not reorder the fields. 4127 * 4128 * (Padding at the end of fFields is ok: 4129 * As long as it is no larger than fStackFields, it is not wasted space.) 4130 * 4131 * For some of the history of the UnicodeString class fields layout, see 4132 * - ICU ticket #11551 "longer UnicodeString contents in stack buffer" 4133 * - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays" 4134 * - ICU ticket #8322 "why is sizeof(UnicodeString)==48?" 4135 */ 4136 // (implicit) *vtable; 4137 union StackBufferOrFields { 4138 // fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used. 4139 // Each struct of the union must begin with fLengthAndFlags. 4140 struct { 4141 int16_t fLengthAndFlags; // bit fields: see constants above 4142 char16_t fBuffer[US_STACKBUF_SIZE]; // buffer for short strings 4143 } fStackFields; 4144 struct { 4145 int16_t fLengthAndFlags; // bit fields: see constants above 4146 int32_t fLength; // number of characters in fArray if >127; else undefined 4147 int32_t fCapacity; // capacity of fArray (in char16_ts) 4148 // array pointer last to minimize padding for machines with P128 data model 4149 // or pointer sizes that are not a power of 2 4150 char16_t *fArray; // the Unicode data 4151 } fFields; 4152 } fUnion; 4153 }; 4154 4155 /** 4156 * Creates a new UnicodeString from the concatenation of two others. 4157 * 4158 * @param s1 The first string to be copied to the new one. 4159 * @param s2 The second string to be copied to the new one, after s1. 4160 * @return UnicodeString(s1).append(s2) 4161 * @stable ICU 2.8 4162 */ 4163 U_COMMON_API UnicodeString U_EXPORT2 4164 operator+ (const UnicodeString &s1, const UnicodeString &s2); 4165 4166 /** 4167 * Creates a new UnicodeString from the concatenation of a UnicodeString and `s2` 4168 * which is, or which is implicitly convertible to, 4169 * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view. 4170 * 4171 * @param s1 The string to be copied to the new one. 4172 * @param s2 The string view to be copied to the new string, after s1. 4173 * @return UnicodeString(s1).append(s2) 4174 * @stable ICU 76 4175 */ 4176 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>> 4177 inline UnicodeString operator+(const UnicodeString &s1, const S &s2) { 4178 return unistr_internalConcat(s1, internal::toU16StringView(s2)); 4179 } 4180 4181 #ifndef U_FORCE_HIDE_INTERNAL_API 4182 /** @internal */ 4183 U_COMMON_API UnicodeString U_EXPORT2 4184 unistr_internalConcat(const UnicodeString &s1, std::u16string_view s2); 4185 #endif 4186 4187 //======================================== 4188 // Inline members 4189 //======================================== 4190 4191 //======================================== 4192 // Privates 4193 //======================================== 4194 4195 inline void 4196 UnicodeString::pinIndex(int32_t& start) const 4197 { 4198 // pin index 4199 if(start < 0) { 4200 start = 0; 4201 } else if(start > length()) { 4202 start = length(); 4203 } 4204 } 4205 4206 inline void 4207 UnicodeString::pinIndices(int32_t& start, 4208 int32_t& _length) const 4209 { 4210 // pin indices 4211 int32_t len = length(); 4212 if(start < 0) { 4213 start = 0; 4214 } else if(start > len) { 4215 start = len; 4216 } 4217 if(_length < 0) { 4218 _length = 0; 4219 } else if(_length > (len - start)) { 4220 _length = (len - start); 4221 } 4222 } 4223 4224 inline char16_t* 4225 UnicodeString::getArrayStart() { 4226 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? 4227 fUnion.fStackFields.fBuffer : fUnion.fFields.fArray; 4228 } 4229 4230 inline const char16_t* 4231 UnicodeString::getArrayStart() const { 4232 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? 4233 fUnion.fStackFields.fBuffer : fUnion.fFields.fArray; 4234 } 4235 4236 //======================================== 4237 // Default constructor 4238 //======================================== 4239 4240 inline 4241 UnicodeString::UnicodeString() { 4242 fUnion.fStackFields.fLengthAndFlags=kShortString; 4243 } 4244 4245 inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/) { 4246 fUnion.fStackFields.fLengthAndFlags=kShortString; 4247 } 4248 4249 inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/, int32_t /*length*/) { 4250 fUnion.fStackFields.fLengthAndFlags=kShortString; 4251 } 4252 4253 inline UnicodeString::UnicodeString(std::nullptr_t /*buffer*/, int32_t /*buffLength*/, int32_t /*buffCapacity*/) { 4254 fUnion.fStackFields.fLengthAndFlags=kShortString; 4255 } 4256 4257 //======================================== 4258 // Read-only implementation methods 4259 //======================================== 4260 inline UBool 4261 UnicodeString::hasShortLength() const { 4262 return fUnion.fFields.fLengthAndFlags>=0; 4263 } 4264 4265 inline int32_t 4266 UnicodeString::getShortLength() const { 4267 // fLengthAndFlags must be non-negative -> short length >= 0 4268 // and arithmetic or logical shift does not matter. 4269 return fUnion.fFields.fLengthAndFlags>>kLengthShift; 4270 } 4271 4272 inline int32_t 4273 UnicodeString::length() const { 4274 return hasShortLength() ? getShortLength() : fUnion.fFields.fLength; 4275 } 4276 4277 inline int32_t 4278 UnicodeString::getCapacity() const { 4279 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? 4280 US_STACKBUF_SIZE : fUnion.fFields.fCapacity; 4281 } 4282 4283 inline int32_t 4284 UnicodeString::hashCode() const 4285 { return doHashCode(); } 4286 4287 inline UBool 4288 UnicodeString::isBogus() const 4289 { return fUnion.fFields.fLengthAndFlags & kIsBogus; } 4290 4291 inline UBool 4292 UnicodeString::isWritable() const 4293 { return !(fUnion.fFields.fLengthAndFlags & (kOpenGetBuffer | kIsBogus)); } 4294 4295 inline UBool 4296 UnicodeString::isBufferWritable() const 4297 { 4298 return 4299 !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) && 4300 (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1); 4301 } 4302 4303 inline const char16_t * 4304 UnicodeString::getBuffer() const { 4305 if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) { 4306 return nullptr; 4307 } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) { 4308 return fUnion.fStackFields.fBuffer; 4309 } else { 4310 return fUnion.fFields.fArray; 4311 } 4312 } 4313 4314 //======================================== 4315 // Read-only alias methods 4316 //======================================== 4317 inline int8_t 4318 UnicodeString::doCompare(int32_t start, 4319 int32_t thisLength, 4320 const UnicodeString& srcText, 4321 int32_t srcStart, 4322 int32_t srcLength) const 4323 { 4324 if(srcText.isBogus()) { 4325 return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise 4326 } else { 4327 srcText.pinIndices(srcStart, srcLength); 4328 return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); 4329 } 4330 } 4331 4332 inline UBool 4333 UnicodeString::doEqualsSubstring(int32_t start, 4334 int32_t thisLength, 4335 const UnicodeString& srcText, 4336 int32_t srcStart, 4337 int32_t srcLength) const 4338 { 4339 if(srcText.isBogus()) { 4340 return isBogus(); 4341 } else { 4342 srcText.pinIndices(srcStart, srcLength); 4343 return !isBogus() && doEqualsSubstring(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); 4344 } 4345 } 4346 4347 inline bool 4348 UnicodeString::operator== (const UnicodeString& text) const 4349 { 4350 if(isBogus()) { 4351 return text.isBogus(); 4352 } else { 4353 int32_t len = length(), textLength = text.length(); 4354 return !text.isBogus() && len == textLength && doEquals(text, len); 4355 } 4356 } 4357 4358 inline bool 4359 UnicodeString::operator!= (const UnicodeString& text) const 4360 { return (! operator==(text)); } 4361 4362 inline UBool 4363 UnicodeString::operator> (const UnicodeString& text) const 4364 { return doCompare(0, length(), text, 0, text.length()) == 1; } 4365 4366 inline UBool 4367 UnicodeString::operator< (const UnicodeString& text) const 4368 { return doCompare(0, length(), text, 0, text.length()) == -1; } 4369 4370 inline UBool 4371 UnicodeString::operator>= (const UnicodeString& text) const 4372 { return doCompare(0, length(), text, 0, text.length()) != -1; } 4373 4374 inline UBool 4375 UnicodeString::operator<= (const UnicodeString& text) const 4376 { return doCompare(0, length(), text, 0, text.length()) != 1; } 4377 4378 inline int8_t 4379 UnicodeString::compare(const UnicodeString& text) const 4380 { return doCompare(0, length(), text, 0, text.length()); } 4381 4382 inline int8_t 4383 UnicodeString::compare(int32_t start, 4384 int32_t _length, 4385 const UnicodeString& srcText) const 4386 { return doCompare(start, _length, srcText, 0, srcText.length()); } 4387 4388 inline int8_t 4389 UnicodeString::compare(ConstChar16Ptr srcChars, 4390 int32_t srcLength) const 4391 { return doCompare(0, length(), srcChars, 0, srcLength); } 4392 4393 inline int8_t 4394 UnicodeString::compare(int32_t start, 4395 int32_t _length, 4396 const UnicodeString& srcText, 4397 int32_t srcStart, 4398 int32_t srcLength) const 4399 { return doCompare(start, _length, srcText, srcStart, srcLength); } 4400 4401 inline int8_t 4402 UnicodeString::compare(int32_t start, 4403 int32_t _length, 4404 const char16_t *srcChars) const 4405 { return doCompare(start, _length, srcChars, 0, _length); } 4406 4407 inline int8_t 4408 UnicodeString::compare(int32_t start, 4409 int32_t _length, 4410 const char16_t *srcChars, 4411 int32_t srcStart, 4412 int32_t srcLength) const 4413 { return doCompare(start, _length, srcChars, srcStart, srcLength); } 4414 4415 inline int8_t 4416 UnicodeString::compareBetween(int32_t start, 4417 int32_t limit, 4418 const UnicodeString& srcText, 4419 int32_t srcStart, 4420 int32_t srcLimit) const 4421 { return doCompare(start, limit - start, 4422 srcText, srcStart, srcLimit - srcStart); } 4423 4424 inline int8_t 4425 UnicodeString::doCompareCodePointOrder(int32_t start, 4426 int32_t thisLength, 4427 const UnicodeString& srcText, 4428 int32_t srcStart, 4429 int32_t srcLength) const 4430 { 4431 if(srcText.isBogus()) { 4432 return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise 4433 } else { 4434 srcText.pinIndices(srcStart, srcLength); 4435 return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); 4436 } 4437 } 4438 4439 inline int8_t 4440 UnicodeString::compareCodePointOrder(const UnicodeString& text) const 4441 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); } 4442 4443 inline int8_t 4444 UnicodeString::compareCodePointOrder(int32_t start, 4445 int32_t _length, 4446 const UnicodeString& srcText) const 4447 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); } 4448 4449 inline int8_t 4450 UnicodeString::compareCodePointOrder(ConstChar16Ptr srcChars, 4451 int32_t srcLength) const 4452 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); } 4453 4454 inline int8_t 4455 UnicodeString::compareCodePointOrder(int32_t start, 4456 int32_t _length, 4457 const UnicodeString& srcText, 4458 int32_t srcStart, 4459 int32_t srcLength) const 4460 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); } 4461 4462 inline int8_t 4463 UnicodeString::compareCodePointOrder(int32_t start, 4464 int32_t _length, 4465 const char16_t *srcChars) const 4466 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); } 4467 4468 inline int8_t 4469 UnicodeString::compareCodePointOrder(int32_t start, 4470 int32_t _length, 4471 const char16_t *srcChars, 4472 int32_t srcStart, 4473 int32_t srcLength) const 4474 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); } 4475 4476 inline int8_t 4477 UnicodeString::compareCodePointOrderBetween(int32_t start, 4478 int32_t limit, 4479 const UnicodeString& srcText, 4480 int32_t srcStart, 4481 int32_t srcLimit) const 4482 { return doCompareCodePointOrder(start, limit - start, 4483 srcText, srcStart, srcLimit - srcStart); } 4484 4485 inline int8_t 4486 UnicodeString::doCaseCompare(int32_t start, 4487 int32_t thisLength, 4488 const UnicodeString &srcText, 4489 int32_t srcStart, 4490 int32_t srcLength, 4491 uint32_t options) const 4492 { 4493 if(srcText.isBogus()) { 4494 return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise 4495 } else { 4496 srcText.pinIndices(srcStart, srcLength); 4497 return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options); 4498 } 4499 } 4500 4501 inline int8_t 4502 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const { 4503 return doCaseCompare(0, length(), text, 0, text.length(), options); 4504 } 4505 4506 inline int8_t 4507 UnicodeString::caseCompare(int32_t start, 4508 int32_t _length, 4509 const UnicodeString &srcText, 4510 uint32_t options) const { 4511 return doCaseCompare(start, _length, srcText, 0, srcText.length(), options); 4512 } 4513 4514 inline int8_t 4515 UnicodeString::caseCompare(ConstChar16Ptr srcChars, 4516 int32_t srcLength, 4517 uint32_t options) const { 4518 return doCaseCompare(0, length(), srcChars, 0, srcLength, options); 4519 } 4520 4521 inline int8_t 4522 UnicodeString::caseCompare(int32_t start, 4523 int32_t _length, 4524 const UnicodeString &srcText, 4525 int32_t srcStart, 4526 int32_t srcLength, 4527 uint32_t options) const { 4528 return doCaseCompare(start, _length, srcText, srcStart, srcLength, options); 4529 } 4530 4531 inline int8_t 4532 UnicodeString::caseCompare(int32_t start, 4533 int32_t _length, 4534 const char16_t *srcChars, 4535 uint32_t options) const { 4536 return doCaseCompare(start, _length, srcChars, 0, _length, options); 4537 } 4538 4539 inline int8_t 4540 UnicodeString::caseCompare(int32_t start, 4541 int32_t _length, 4542 const char16_t *srcChars, 4543 int32_t srcStart, 4544 int32_t srcLength, 4545 uint32_t options) const { 4546 return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options); 4547 } 4548 4549 inline int8_t 4550 UnicodeString::caseCompareBetween(int32_t start, 4551 int32_t limit, 4552 const UnicodeString &srcText, 4553 int32_t srcStart, 4554 int32_t srcLimit, 4555 uint32_t options) const { 4556 return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options); 4557 } 4558 4559 inline int32_t 4560 UnicodeString::indexOf(const UnicodeString& srcText, 4561 int32_t srcStart, 4562 int32_t srcLength, 4563 int32_t start, 4564 int32_t _length) const 4565 { 4566 if(!srcText.isBogus()) { 4567 srcText.pinIndices(srcStart, srcLength); 4568 if(srcLength > 0) { 4569 return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); 4570 } 4571 } 4572 return -1; 4573 } 4574 4575 inline int32_t 4576 UnicodeString::indexOf(const UnicodeString& text) const 4577 { return indexOf(text, 0, text.length(), 0, length()); } 4578 4579 inline int32_t 4580 UnicodeString::indexOf(const UnicodeString& text, 4581 int32_t start) const { 4582 pinIndex(start); 4583 return indexOf(text, 0, text.length(), start, length() - start); 4584 } 4585 4586 inline int32_t 4587 UnicodeString::indexOf(const UnicodeString& text, 4588 int32_t start, 4589 int32_t _length) const 4590 { return indexOf(text, 0, text.length(), start, _length); } 4591 4592 inline int32_t 4593 UnicodeString::indexOf(const char16_t *srcChars, 4594 int32_t srcLength, 4595 int32_t start) const { 4596 pinIndex(start); 4597 return indexOf(srcChars, 0, srcLength, start, length() - start); 4598 } 4599 4600 inline int32_t 4601 UnicodeString::indexOf(ConstChar16Ptr srcChars, 4602 int32_t srcLength, 4603 int32_t start, 4604 int32_t _length) const 4605 { return indexOf(srcChars, 0, srcLength, start, _length); } 4606 4607 inline int32_t 4608 UnicodeString::indexOf(char16_t c, 4609 int32_t start, 4610 int32_t _length) const 4611 { return doIndexOf(c, start, _length); } 4612 4613 inline int32_t 4614 UnicodeString::indexOf(UChar32 c, 4615 int32_t start, 4616 int32_t _length) const 4617 { return doIndexOf(c, start, _length); } 4618 4619 inline int32_t 4620 UnicodeString::indexOf(char16_t c) const 4621 { return doIndexOf(c, 0, length()); } 4622 4623 inline int32_t 4624 UnicodeString::indexOf(UChar32 c) const 4625 { return indexOf(c, 0, length()); } 4626 4627 inline int32_t 4628 UnicodeString::indexOf(char16_t c, 4629 int32_t start) const { 4630 pinIndex(start); 4631 return doIndexOf(c, start, length() - start); 4632 } 4633 4634 inline int32_t 4635 UnicodeString::indexOf(UChar32 c, 4636 int32_t start) const { 4637 pinIndex(start); 4638 return indexOf(c, start, length() - start); 4639 } 4640 4641 inline int32_t 4642 UnicodeString::lastIndexOf(ConstChar16Ptr srcChars, 4643 int32_t srcLength, 4644 int32_t start, 4645 int32_t _length) const 4646 { return lastIndexOf(srcChars, 0, srcLength, start, _length); } 4647 4648 inline int32_t 4649 UnicodeString::lastIndexOf(const char16_t *srcChars, 4650 int32_t srcLength, 4651 int32_t start) const { 4652 pinIndex(start); 4653 return lastIndexOf(srcChars, 0, srcLength, start, length() - start); 4654 } 4655 4656 inline int32_t 4657 UnicodeString::lastIndexOf(const UnicodeString& srcText, 4658 int32_t srcStart, 4659 int32_t srcLength, 4660 int32_t start, 4661 int32_t _length) const 4662 { 4663 if(!srcText.isBogus()) { 4664 srcText.pinIndices(srcStart, srcLength); 4665 if(srcLength > 0) { 4666 return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); 4667 } 4668 } 4669 return -1; 4670 } 4671 4672 inline int32_t 4673 UnicodeString::lastIndexOf(const UnicodeString& text, 4674 int32_t start, 4675 int32_t _length) const 4676 { return lastIndexOf(text, 0, text.length(), start, _length); } 4677 4678 inline int32_t 4679 UnicodeString::lastIndexOf(const UnicodeString& text, 4680 int32_t start) const { 4681 pinIndex(start); 4682 return lastIndexOf(text, 0, text.length(), start, length() - start); 4683 } 4684 4685 inline int32_t 4686 UnicodeString::lastIndexOf(const UnicodeString& text) const 4687 { return lastIndexOf(text, 0, text.length(), 0, length()); } 4688 4689 inline int32_t 4690 UnicodeString::lastIndexOf(char16_t c, 4691 int32_t start, 4692 int32_t _length) const 4693 { return doLastIndexOf(c, start, _length); } 4694 4695 inline int32_t 4696 UnicodeString::lastIndexOf(UChar32 c, 4697 int32_t start, 4698 int32_t _length) const { 4699 return doLastIndexOf(c, start, _length); 4700 } 4701 4702 inline int32_t 4703 UnicodeString::lastIndexOf(char16_t c) const 4704 { return doLastIndexOf(c, 0, length()); } 4705 4706 inline int32_t 4707 UnicodeString::lastIndexOf(UChar32 c) const { 4708 return lastIndexOf(c, 0, length()); 4709 } 4710 4711 inline int32_t 4712 UnicodeString::lastIndexOf(char16_t c, 4713 int32_t start) const { 4714 pinIndex(start); 4715 return doLastIndexOf(c, start, length() - start); 4716 } 4717 4718 inline int32_t 4719 UnicodeString::lastIndexOf(UChar32 c, 4720 int32_t start) const { 4721 pinIndex(start); 4722 return lastIndexOf(c, start, length() - start); 4723 } 4724 4725 inline UBool 4726 UnicodeString::startsWith(const UnicodeString& text) const 4727 { return doEqualsSubstring(0, text.length(), text, 0, text.length()); } 4728 4729 inline UBool 4730 UnicodeString::startsWith(const UnicodeString& srcText, 4731 int32_t srcStart, 4732 int32_t srcLength) const 4733 { return doEqualsSubstring(0, srcLength, srcText, srcStart, srcLength); } 4734 4735 inline UBool 4736 UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const { 4737 if(srcLength < 0) { 4738 srcLength = u_strlen(toUCharPtr(srcChars)); 4739 } 4740 return doEqualsSubstring(0, srcLength, srcChars, 0, srcLength); 4741 } 4742 4743 inline UBool 4744 UnicodeString::startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const { 4745 if(srcLength < 0) { 4746 srcLength = u_strlen(toUCharPtr(srcChars)); 4747 } 4748 return doEqualsSubstring(0, srcLength, srcChars, srcStart, srcLength); 4749 } 4750 4751 inline UBool 4752 UnicodeString::endsWith(const UnicodeString& text) const 4753 { return doEqualsSubstring(length() - text.length(), text.length(), 4754 text, 0, text.length()); } 4755 4756 inline UBool 4757 UnicodeString::endsWith(const UnicodeString& srcText, 4758 int32_t srcStart, 4759 int32_t srcLength) const { 4760 srcText.pinIndices(srcStart, srcLength); 4761 return doEqualsSubstring(length() - srcLength, srcLength, 4762 srcText, srcStart, srcLength); 4763 } 4764 4765 inline UBool 4766 UnicodeString::endsWith(ConstChar16Ptr srcChars, 4767 int32_t srcLength) const { 4768 if(srcLength < 0) { 4769 srcLength = u_strlen(toUCharPtr(srcChars)); 4770 } 4771 return doEqualsSubstring(length() - srcLength, srcLength, srcChars, 0, srcLength); 4772 } 4773 4774 inline UBool 4775 UnicodeString::endsWith(const char16_t *srcChars, 4776 int32_t srcStart, 4777 int32_t srcLength) const { 4778 if(srcLength < 0) { 4779 srcLength = u_strlen(toUCharPtr(srcChars + srcStart)); 4780 } 4781 return doEqualsSubstring(length() - srcLength, srcLength, 4782 srcChars, srcStart, srcLength); 4783 } 4784 4785 //======================================== 4786 // replace 4787 //======================================== 4788 inline UnicodeString& 4789 UnicodeString::replace(int32_t start, 4790 int32_t _length, 4791 const UnicodeString& srcText) 4792 { return doReplace(start, _length, srcText, 0, srcText.length()); } 4793 4794 inline UnicodeString& 4795 UnicodeString::replace(int32_t start, 4796 int32_t _length, 4797 const UnicodeString& srcText, 4798 int32_t srcStart, 4799 int32_t srcLength) 4800 { return doReplace(start, _length, srcText, srcStart, srcLength); } 4801 4802 inline UnicodeString& 4803 UnicodeString::replace(int32_t start, 4804 int32_t _length, 4805 ConstChar16Ptr srcChars, 4806 int32_t srcLength) 4807 { return doReplace(start, _length, srcChars, 0, srcLength); } 4808 4809 inline UnicodeString& 4810 UnicodeString::replace(int32_t start, 4811 int32_t _length, 4812 const char16_t *srcChars, 4813 int32_t srcStart, 4814 int32_t srcLength) 4815 { return doReplace(start, _length, srcChars, srcStart, srcLength); } 4816 4817 inline UnicodeString& 4818 UnicodeString::replace(int32_t start, 4819 int32_t _length, 4820 char16_t srcChar) 4821 { return doReplace(start, _length, &srcChar, 0, 1); } 4822 4823 inline UnicodeString& 4824 UnicodeString::replaceBetween(int32_t start, 4825 int32_t limit, 4826 const UnicodeString& srcText) 4827 { return doReplace(start, limit - start, srcText, 0, srcText.length()); } 4828 4829 inline UnicodeString& 4830 UnicodeString::replaceBetween(int32_t start, 4831 int32_t limit, 4832 const UnicodeString& srcText, 4833 int32_t srcStart, 4834 int32_t srcLimit) 4835 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); } 4836 4837 inline UnicodeString& 4838 UnicodeString::findAndReplace(const UnicodeString& oldText, 4839 const UnicodeString& newText) 4840 { return findAndReplace(0, length(), oldText, 0, oldText.length(), 4841 newText, 0, newText.length()); } 4842 4843 inline UnicodeString& 4844 UnicodeString::findAndReplace(int32_t start, 4845 int32_t _length, 4846 const UnicodeString& oldText, 4847 const UnicodeString& newText) 4848 { return findAndReplace(start, _length, oldText, 0, oldText.length(), 4849 newText, 0, newText.length()); } 4850 4851 // ============================ 4852 // extract 4853 // ============================ 4854 inline void 4855 UnicodeString::doExtract(int32_t start, 4856 int32_t _length, 4857 UnicodeString& target) const 4858 { target.replace(0, target.length(), *this, start, _length); } 4859 4860 inline void 4861 UnicodeString::extract(int32_t start, 4862 int32_t _length, 4863 Char16Ptr target, 4864 int32_t targetStart) const 4865 { doExtract(start, _length, target, targetStart); } 4866 4867 inline void 4868 UnicodeString::extract(int32_t start, 4869 int32_t _length, 4870 UnicodeString& target) const 4871 { doExtract(start, _length, target); } 4872 4873 #if !UCONFIG_NO_CONVERSION 4874 4875 inline int32_t 4876 UnicodeString::extract(int32_t start, 4877 int32_t _length, 4878 char *dst, 4879 const char *codepage) const 4880 4881 { 4882 // This dstSize value will be checked explicitly 4883 return extract(start, _length, dst, dst != nullptr ? 0xffffffff : 0, codepage); 4884 } 4885 4886 #endif 4887 4888 inline void 4889 UnicodeString::extractBetween(int32_t start, 4890 int32_t limit, 4891 char16_t *dst, 4892 int32_t dstStart) const { 4893 pinIndex(start); 4894 pinIndex(limit); 4895 doExtract(start, limit - start, dst, dstStart); 4896 } 4897 4898 inline UnicodeString 4899 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const { 4900 return tempSubString(start, limit - start); 4901 } 4902 4903 inline char16_t 4904 UnicodeString::doCharAt(int32_t offset) const 4905 { 4906 if (static_cast<uint32_t>(offset) < static_cast<uint32_t>(length())) { 4907 return getArrayStart()[offset]; 4908 } else { 4909 return kInvalidUChar; 4910 } 4911 } 4912 4913 inline char16_t 4914 UnicodeString::charAt(int32_t offset) const 4915 { return doCharAt(offset); } 4916 4917 inline char16_t 4918 UnicodeString::operator[] (int32_t offset) const 4919 { return doCharAt(offset); } 4920 4921 inline UBool 4922 UnicodeString::isEmpty() const { 4923 // Arithmetic or logical right shift does not matter: only testing for 0. 4924 return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0; 4925 } 4926 4927 //======================================== 4928 // Write implementation methods 4929 //======================================== 4930 inline void 4931 UnicodeString::setZeroLength() { 4932 fUnion.fFields.fLengthAndFlags &= kAllStorageFlags; 4933 } 4934 4935 inline void 4936 UnicodeString::setShortLength(int32_t len) { 4937 // requires 0 <= len <= kMaxShortLength 4938 fUnion.fFields.fLengthAndFlags = 4939 static_cast<int16_t>((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift)); 4940 } 4941 4942 inline void 4943 UnicodeString::setLength(int32_t len) { 4944 if(len <= kMaxShortLength) { 4945 setShortLength(len); 4946 } else { 4947 fUnion.fFields.fLengthAndFlags |= kLengthIsLarge; 4948 fUnion.fFields.fLength = len; 4949 } 4950 } 4951 4952 inline void 4953 UnicodeString::setToEmpty() { 4954 fUnion.fFields.fLengthAndFlags = kShortString; 4955 } 4956 4957 inline void 4958 UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) { 4959 setLength(len); 4960 fUnion.fFields.fArray = array; 4961 fUnion.fFields.fCapacity = capacity; 4962 } 4963 4964 inline UnicodeString& 4965 UnicodeString::operator= (char16_t ch) 4966 { return doReplace(0, length(), &ch, 0, 1); } 4967 4968 inline UnicodeString& 4969 UnicodeString::operator= (UChar32 ch) 4970 { return replace(0, length(), ch); } 4971 4972 inline UnicodeString& 4973 UnicodeString::setTo(const UnicodeString& srcText, 4974 int32_t srcStart, 4975 int32_t srcLength) 4976 { 4977 unBogus(); 4978 return doReplace(0, length(), srcText, srcStart, srcLength); 4979 } 4980 4981 inline UnicodeString& 4982 UnicodeString::setTo(const UnicodeString& srcText, 4983 int32_t srcStart) 4984 { 4985 unBogus(); 4986 srcText.pinIndex(srcStart); 4987 return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart); 4988 } 4989 4990 inline UnicodeString& 4991 UnicodeString::setTo(const UnicodeString& srcText) 4992 { 4993 return copyFrom(srcText); 4994 } 4995 4996 inline UnicodeString& 4997 UnicodeString::setTo(const char16_t *srcChars, 4998 int32_t srcLength) 4999 { 5000 unBogus(); 5001 return doReplace(0, length(), srcChars, 0, srcLength); 5002 } 5003 5004 inline UnicodeString& 5005 UnicodeString::setTo(char16_t srcChar) 5006 { 5007 unBogus(); 5008 return doReplace(0, length(), &srcChar, 0, 1); 5009 } 5010 5011 inline UnicodeString& 5012 UnicodeString::setTo(UChar32 srcChar) 5013 { 5014 unBogus(); 5015 return replace(0, length(), srcChar); 5016 } 5017 5018 inline UnicodeString& 5019 UnicodeString::append(const UnicodeString& srcText, 5020 int32_t srcStart, 5021 int32_t srcLength) 5022 { return doAppend(srcText, srcStart, srcLength); } 5023 5024 inline UnicodeString& 5025 UnicodeString::append(const UnicodeString& srcText) 5026 { return doAppend(srcText, 0, srcText.length()); } 5027 5028 inline UnicodeString& 5029 UnicodeString::append(const char16_t *srcChars, 5030 int32_t srcStart, 5031 int32_t srcLength) 5032 { return doAppend(srcChars, srcStart, srcLength); } 5033 5034 inline UnicodeString& 5035 UnicodeString::append(ConstChar16Ptr srcChars, 5036 int32_t srcLength) 5037 { return doAppend(srcChars, 0, srcLength); } 5038 5039 inline UnicodeString& 5040 UnicodeString::append(char16_t srcChar) 5041 { return doAppend(&srcChar, 0, 1); } 5042 5043 inline UnicodeString& 5044 UnicodeString::operator+= (char16_t ch) 5045 { return doAppend(&ch, 0, 1); } 5046 5047 inline UnicodeString& 5048 UnicodeString::operator+= (UChar32 ch) { 5049 return append(ch); 5050 } 5051 5052 inline UnicodeString& 5053 UnicodeString::operator+= (const UnicodeString& srcText) 5054 { return doAppend(srcText, 0, srcText.length()); } 5055 5056 inline UnicodeString& 5057 UnicodeString::insert(int32_t start, 5058 const UnicodeString& srcText, 5059 int32_t srcStart, 5060 int32_t srcLength) 5061 { return doReplace(start, 0, srcText, srcStart, srcLength); } 5062 5063 inline UnicodeString& 5064 UnicodeString::insert(int32_t start, 5065 const UnicodeString& srcText) 5066 { return doReplace(start, 0, srcText, 0, srcText.length()); } 5067 5068 inline UnicodeString& 5069 UnicodeString::insert(int32_t start, 5070 const char16_t *srcChars, 5071 int32_t srcStart, 5072 int32_t srcLength) 5073 { return doReplace(start, 0, srcChars, srcStart, srcLength); } 5074 5075 inline UnicodeString& 5076 UnicodeString::insert(int32_t start, 5077 ConstChar16Ptr srcChars, 5078 int32_t srcLength) 5079 { return doReplace(start, 0, srcChars, 0, srcLength); } 5080 5081 inline UnicodeString& 5082 UnicodeString::insert(int32_t start, 5083 char16_t srcChar) 5084 { return doReplace(start, 0, &srcChar, 0, 1); } 5085 5086 inline UnicodeString& 5087 UnicodeString::insert(int32_t start, 5088 UChar32 srcChar) 5089 { return replace(start, 0, srcChar); } 5090 5091 5092 inline UnicodeString& 5093 UnicodeString::remove() 5094 { 5095 // remove() of a bogus string makes the string empty and non-bogus 5096 if(isBogus()) { 5097 setToEmpty(); 5098 } else { 5099 setZeroLength(); 5100 } 5101 return *this; 5102 } 5103 5104 inline UnicodeString& 5105 UnicodeString::remove(int32_t start, 5106 int32_t _length) 5107 { 5108 if(start <= 0 && _length == INT32_MAX) { 5109 // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus 5110 return remove(); 5111 } 5112 return doReplace(start, _length, nullptr, 0, 0); 5113 } 5114 5115 inline UnicodeString& 5116 UnicodeString::removeBetween(int32_t start, 5117 int32_t limit) 5118 { return doReplace(start, limit - start, nullptr, 0, 0); } 5119 5120 inline UnicodeString & 5121 UnicodeString::retainBetween(int32_t start, int32_t limit) { 5122 truncate(limit); 5123 return doReplace(0, start, nullptr, 0, 0); 5124 } 5125 5126 inline UBool 5127 UnicodeString::truncate(int32_t targetLength) 5128 { 5129 if(isBogus() && targetLength == 0) { 5130 // truncate(0) of a bogus string makes the string empty and non-bogus 5131 unBogus(); 5132 return false; 5133 } else if (static_cast<uint32_t>(targetLength) < static_cast<uint32_t>(length())) { 5134 setLength(targetLength); 5135 return true; 5136 } else { 5137 return false; 5138 } 5139 } 5140 5141 inline UnicodeString& 5142 UnicodeString::reverse() 5143 { return doReverse(0, length()); } 5144 5145 inline UnicodeString& 5146 UnicodeString::reverse(int32_t start, 5147 int32_t _length) 5148 { return doReverse(start, _length); } 5149 5150 U_NAMESPACE_END 5151 5152 #endif /* U_SHOW_CPLUSPLUS_API */ 5153 5154 #endif