[ tor-browser ].git.dasho

unistr.h (188891B)
      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 *   Copyright (C) 1998-2016, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 **********************************************************************
      8 *
      9 * File unistr.h
     10 *
     11 * Modification History:
     12 *
     13 *   Date        Name        Description
     14 *   09/25/98    stephen     Creation.
     15 *   11/11/98    stephen     Changed per 11/9 code review.
     16 *   04/20/99    stephen     Overhauled per 4/16 code review.
     17 *   11/18/99    aliu        Made to inherit from Replaceable.  Added method
     18 *                           handleReplaceBetween(); other methods unchanged.
     19 *   06/25/01    grhoten     Remove dependency on iostream.
     20 ******************************************************************************
     21 */
     22 
     23 #ifndef UNISTR_H
     24 #define UNISTR_H
     25 
     26 /**
     27 * \file
     28 * \brief C++ API: Unicode String
     29 */
     30 
     31 #include "unicode/utypes.h"
     32 
     33 #if U_SHOW_CPLUSPLUS_API
     34 
     35 #include <cstddef>
     36 #include <string_view>
     37 #include "unicode/char16ptr.h"
     38 #include "unicode/rep.h"
     39 #include "unicode/std_string.h"
     40 #include "unicode/stringpiece.h"
     41 #include "unicode/bytestream.h"
     42 
     43 struct UConverter;          // unicode/ucnv.h
     44 
     45 #ifndef USTRING_H
     46 /**
     47 * \ingroup ustring_ustrlen
     48 * @param s Pointer to sequence of UChars.
     49 * @return Length of sequence.
     50 */
     51 U_CAPI int32_t U_EXPORT2 u_strlen(const UChar *s);
     52 #endif
     53 
     54 U_NAMESPACE_BEGIN
     55 
     56 #if !UCONFIG_NO_BREAK_ITERATION
     57 class BreakIterator;        // unicode/brkiter.h
     58 #endif
     59 class Edits;
     60 
     61 U_NAMESPACE_END
     62 
     63 // Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper.
     64 /**
     65 * Internal string case mapping function type.
     66 * All error checking must be done.
     67 * src and dest must not overlap.
     68 * @internal
     69 */
     70 typedef int32_t U_CALLCONV
     71 UStringCaseMapper(int32_t caseLocale, uint32_t options,
     72 #if !UCONFIG_NO_BREAK_ITERATION
     73                  icu::BreakIterator *iter,
     74 #endif
     75                  char16_t *dest, int32_t destCapacity,
     76                  const char16_t *src, int32_t srcLength,
     77                  icu::Edits *edits,
     78                  UErrorCode &errorCode);
     79 
     80 U_NAMESPACE_BEGIN
     81 
     82 class Locale;               // unicode/locid.h
     83 class StringCharacterIterator;
     84 class UnicodeStringAppendable;  // unicode/appendable.h
     85 
     86 /* The <iostream> include has been moved to unicode/ustream.h */
     87 
     88 /**
     89 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
     90 * which constructs a Unicode string from an invariant-character char * string.
     91 * About invariant characters see utypes.h.
     92 * This constructor has no runtime dependency on conversion code and is
     93 * therefore recommended over ones taking a charset name string
     94 * (where the empty string "" indicates invariant-character conversion).
     95 *
     96 * @stable ICU 3.2
     97 */
     98 #define US_INV icu::UnicodeString::kInvariant
     99 
    100 /**
    101 * \def UNICODE_STRING
    102 * Obsolete macro approximating UnicodeString literals.
    103 *
    104 * Prior to the availability of C++11 and u"UTF-16 string literals",
    105 * this macro was provided for portability and efficiency when
    106 * initializing UnicodeStrings from literals.
    107 *
    108 * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
    109 * length determination:
    110 * \code
    111 * UnicodeString str(u"literal");
    112 * if (str == u"other literal") { ... }
    113 * \endcode
    114 *
    115 * The string parameter must be a C string literal.
    116 * The length of the string, not including the terminating
    117 * `NUL`, must be specified as a constant.
    118 * @stable ICU 2.0
    119 */
    120 #if !U_CHAR16_IS_TYPEDEF
    121 # define UNICODE_STRING(cs, _length) icu::UnicodeString(true, u ## cs, _length)
    122 #else
    123 # define UNICODE_STRING(cs, _length) icu::UnicodeString(true, (const char16_t*)u ## cs, _length)
    124 #endif
    125 
    126 /**
    127 * Unicode String literals in C++.
    128 * Obsolete macro approximating UnicodeString literals.
    129 * See UNICODE_STRING.
    130 *
    131 * The string parameter must be a C string literal.
    132 * @stable ICU 2.0
    133 * @see UNICODE_STRING
    134 */
    135 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
    136 
    137 /**
    138 * \def UNISTR_FROM_CHAR_EXPLICIT
    139 * This can be defined to be empty or "explicit".
    140 * If explicit, then the UnicodeString(char16_t) and UnicodeString(UChar32)
    141 * constructors are marked as explicit, preventing their inadvertent use.
    142 * @stable ICU 49
    143 */
    144 #ifndef UNISTR_FROM_CHAR_EXPLICIT
    145 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
    146    // Auto-"explicit" in ICU library code.
    147 #   define UNISTR_FROM_CHAR_EXPLICIT explicit
    148 # else
    149    // Empty by default for source code compatibility.
    150 #   define UNISTR_FROM_CHAR_EXPLICIT
    151 # endif
    152 #endif
    153 
    154 /**
    155 * \def UNISTR_FROM_STRING_EXPLICIT
    156 * This can be defined to be empty or "explicit".
    157 * If explicit, then the UnicodeString(const char *) and UnicodeString(const char16_t *)
    158 * constructors are marked as explicit, preventing their inadvertent use.
    159 *
    160 * In particular, this helps prevent accidentally depending on ICU conversion code
    161 * by passing a string literal into an API with a const UnicodeString & parameter.
    162 * @stable ICU 49
    163 */
    164 #ifndef UNISTR_FROM_STRING_EXPLICIT
    165 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
    166    // Auto-"explicit" in ICU library code.
    167 #   define UNISTR_FROM_STRING_EXPLICIT explicit
    168 # else
    169    // Empty by default for source code compatibility.
    170 #   define UNISTR_FROM_STRING_EXPLICIT
    171 # endif
    172 #endif
    173 
    174 /**
    175 * \def UNISTR_OBJECT_SIZE
    176 * Desired sizeof(UnicodeString) in bytes.
    177 * It should be a multiple of sizeof(pointer) to avoid unusable space for padding.
    178 * The object size may want to be a multiple of 16 bytes,
    179 * which is a common granularity for heap allocation.
    180 *
    181 * Any space inside the object beyond sizeof(vtable pointer) + 2
    182 * is available for storing short strings inside the object.
    183 * The bigger the object, the longer a string that can be stored inside the object,
    184 * without additional heap allocation.
    185 *
    186 * Depending on a platform's pointer size, pointer alignment requirements,
    187 * and struct padding, the compiler will usually round up sizeof(UnicodeString)
    188 * to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models),
    189 * to hold the fields for heap-allocated strings.
    190 * Such a minimum size also ensures that the object is easily large enough
    191 * to hold at least 2 char16_ts, for one supplementary code point (U16_MAX_LENGTH).
    192 *
    193 * sizeof(UnicodeString) >= 48 should work for all known platforms.
    194 *
    195 * For example, on a 64-bit machine where sizeof(vtable pointer) is 8,
    196 * sizeof(UnicodeString) = 64 would leave space for
    197 * (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27
    198 * char16_ts stored inside the object.
    199 *
    200 * The minimum object size on a 64-bit machine would be
    201 * 4 * sizeof(pointer) = 4 * 8 = 32 bytes,
    202 * and the internal buffer would hold up to 11 char16_ts in that case.
    203 *
    204 * @see U16_MAX_LENGTH
    205 * @stable ICU 56
    206 */
    207 #ifndef UNISTR_OBJECT_SIZE
    208 # define UNISTR_OBJECT_SIZE 64
    209 #endif
    210 
    211 /**
    212 * UnicodeString is a string class that stores Unicode characters directly and provides
    213 * similar functionality as the Java String and StringBuffer/StringBuilder classes.
    214 * It is a concrete implementation of the abstract class Replaceable (for transliteration).
    215 *
    216 * The UnicodeString equivalent of std::string’s clear() is remove().
    217 *
    218 * Starting with ICU 78, a UnicodeString is a C++ "range" of char16_t code units.
    219 * utfStringCodePoints() and unsafeUTFStringCodePoints() can be used to iterate over
    220 * the code points.
    221 *
    222 * A UnicodeString may "alias" an external array of characters
    223 * (that is, point to it, rather than own the array)
    224 * whose lifetime must then at least match the lifetime of the aliasing object.
    225 * This aliasing may be preserved when returning a UnicodeString by value,
    226 * depending on the compiler and the function implementation,
    227 * via Return Value Optimization (RVO) or the move assignment operator.
    228 * (However, the copy assignment operator does not preserve aliasing.)
    229 * For details see the description of storage models at the end of the class API docs
    230 * and in the User Guide chapter linked from there.
    231 *
    232 * The UnicodeString class is not suitable for subclassing.
    233 *
    234 * For an overview of Unicode strings in C and C++ see the
    235 * [User Guide Strings chapter](https://unicode-org.github.io/icu/userguide/strings#strings-in-cc).
    236 *
    237 * In ICU, a Unicode string consists of 16-bit Unicode *code units*.
    238 * A Unicode character may be stored with either one code unit
    239 * (the most common case) or with a matched pair of special code units
    240 * ("surrogates"). The data type for code units is char16_t.
    241 * For single-character handling, a Unicode character code *point* is a value
    242 * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.
    243 *
    244 * Indexes and offsets into and lengths of strings always count code units, not code points.
    245 * This is the same as with multi-byte char* strings in traditional string handling.
    246 * Operations on partial strings typically do not test for code point boundaries.
    247 * If necessary, the user needs to take care of such boundaries by testing for the code unit
    248 * values or by using functions like
    249 * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
    250 * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).
    251 *
    252 * UnicodeString methods are more lenient with regard to input parameter values
    253 * than other ICU APIs. In particular:
    254 * - If indexes are out of bounds for a UnicodeString object
    255 *   (< 0 or > length()) then they are "pinned" to the nearest boundary.
    256 * - If the buffer passed to an insert/append/replace operation is owned by the
    257 *   target object, e.g., calling str.append(str), an extra copy may take place
    258 *   to ensure safety.
    259 * - If primitive string pointer values (e.g., const char16_t * or char *)
    260 *   for input strings are nullptr, then those input string parameters are treated
    261 *   as if they pointed to an empty string.
    262 *   However, this is *not* the case for char * parameters for charset names
    263 *   or other IDs.
    264 * - Most UnicodeString methods do not take a UErrorCode parameter because
    265 *   there are usually very few opportunities for failure other than a shortage
    266 *   of memory, error codes in low-level C++ string methods would be inconvenient,
    267 *   and the error code as the last parameter (ICU convention) would prevent
    268 *   the use of default parameter values.
    269 *   Instead, such methods set the UnicodeString into a "bogus" state
    270 *   (see isBogus()) if an error occurs.
    271 *
    272 * In string comparisons, two UnicodeString objects that are both "bogus"
    273 * compare equal (to be transitive and prevent endless loops in sorting),
    274 * and a "bogus" string compares less than any non-"bogus" one.
    275 *
    276 * Const UnicodeString methods are thread-safe. Multiple threads can use
    277 * const methods on the same UnicodeString object simultaneously,
    278 * but non-const methods must not be called concurrently (in multiple threads)
    279 * with any other (const or non-const) methods.
    280 *
    281 * Similarly, const UnicodeString & parameters are thread-safe.
    282 * One object may be passed in as such a parameter concurrently in multiple threads.
    283 * This includes the const UnicodeString & parameters for
    284 * copy construction, assignment, and cloning.
    285 *
    286 * UnicodeString uses several storage methods.
    287 * String contents can be stored inside the UnicodeString object itself,
    288 * in an allocated and shared buffer, or in an outside buffer that is "aliased".
    289 * Most of this is done transparently, but careful aliasing in particular provides
    290 * significant performance improvements.
    291 * Also, the internal buffer is accessible via special functions.
    292 * For details see the
    293 * [User Guide Strings chapter](https://unicode-org.github.io/icu/userguide/strings#maximizing-performance-with-the-unicodestring-storage-model).
    294 *
    295 * @see utf.h
    296 * @see utfiterator.h
    297 * @see utfStringCodePoints
    298 * @see unsafeUTFStringCodePoints
    299 * @see CharacterIterator
    300 * @stable ICU 2.0
    301 */
    302 class U_COMMON_API UnicodeString : public Replaceable
    303 {
    304 public:
    305  /** C++ boilerplate @internal */
    306  using value_type = char16_t;
    307 
    308  /**
    309   * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
    310   * which constructs a Unicode string from an invariant-character char * string.
    311   * Use the macro US_INV instead of the full qualification for this value.
    312   *
    313   * @see US_INV
    314   * @stable ICU 3.2
    315   */
    316  enum EInvariant {
    317    /**
    318     * @see EInvariant
    319     * @stable ICU 3.2
    320     */
    321    kInvariant
    322  };
    323 
    324  //========================================
    325  // Read-only operations
    326  //========================================
    327 
    328  /* Comparison - bitwise only - for international comparison use collation */
    329 
    330  /**
    331   * Equality operator. Performs only bitwise comparison.
    332   * @param text The UnicodeString to compare to this one.
    333   * @return true if `text` contains the same characters as this one,
    334   * false otherwise.
    335   * @stable ICU 2.0
    336   */
    337  inline bool operator== (const UnicodeString& text) const;
    338 
    339  /**
    340   * Equality operator. Performs only bitwise comparison with `text`
    341   * which is, or which is implicitly convertible to,
    342   * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
    343   *
    344   * For performance, you can use UTF-16 string literals with compile-time
    345   * length determination:
    346   * \code
    347   * UnicodeString str = ...;
    348   * if (str == u"literal") { ... }
    349   * \endcode
    350   * @param text The string view to compare to this string.
    351   * @return true if `text` contains the same characters as this one, false otherwise.
    352   * @stable ICU 76
    353   */
    354  template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
    355  inline bool operator==(const S &text) const {
    356    std::u16string_view sv(internal::toU16StringView(text));
    357    uint32_t len;  // unsigned to avoid a compiler warning
    358    return !isBogus() && (len = length()) == sv.length() && doEquals(sv.data(), len);
    359  }
    360 
    361  /**
    362   * Inequality operator. Performs only bitwise comparison.
    363   * @param text The UnicodeString to compare to this one.
    364   * @return false if `text` contains the same characters as this one,
    365   * true otherwise.
    366   * @stable ICU 2.0
    367   */
    368  inline bool operator!= (const UnicodeString& text) const;
    369 
    370  /**
    371   * Inequality operator. Performs only bitwise comparison with `text`
    372   * which is, or which is implicitly convertible to,
    373   * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
    374   *
    375   * For performance, you can use std::u16string_view literals with compile-time
    376   * length determination:
    377   * \code
    378   * #include &lt;string_view&gt;
    379   * using namespace std::string_view_literals;
    380   * UnicodeString str = ...;
    381   * if (str != u"literal"sv) { ... }
    382   * \endcode
    383   * @param text The string view to compare to this string.
    384   * @return false if `text` contains the same characters as this one, true otherwise.
    385   * @stable ICU 76
    386   */
    387  template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
    388  inline bool operator!=(const S &text) const {
    389    return !operator==(text);
    390  }
    391 
    392  /**
    393   * Greater than operator. Performs only bitwise comparison.
    394   * @param text The UnicodeString to compare to this one.
    395   * @return true if the characters in this are bitwise
    396   * greater than the characters in `text`, false otherwise
    397   * @stable ICU 2.0
    398   */
    399  inline UBool operator> (const UnicodeString& text) const;
    400 
    401  /**
    402   * Less than operator. Performs only bitwise comparison.
    403   * @param text The UnicodeString to compare to this one.
    404   * @return true if the characters in this are bitwise
    405   * less than the characters in `text`, false otherwise
    406   * @stable ICU 2.0
    407   */
    408  inline UBool operator< (const UnicodeString& text) const;
    409 
    410  /**
    411   * Greater than or equal operator. Performs only bitwise comparison.
    412   * @param text The UnicodeString to compare to this one.
    413   * @return true if the characters in this are bitwise
    414   * greater than or equal to the characters in `text`, false otherwise
    415   * @stable ICU 2.0
    416   */
    417  inline UBool operator>= (const UnicodeString& text) const;
    418 
    419  /**
    420   * Less than or equal operator. Performs only bitwise comparison.
    421   * @param text The UnicodeString to compare to this one.
    422   * @return true if the characters in this are bitwise
    423   * less than or equal to the characters in `text`, false otherwise
    424   * @stable ICU 2.0
    425   */
    426  inline UBool operator<= (const UnicodeString& text) const;
    427 
    428  /**
    429   * Compare the characters bitwise in this UnicodeString to
    430   * the characters in `text`.
    431   * @param text The UnicodeString to compare to this one.
    432   * @return The result of bitwise character comparison: 0 if this
    433   * contains the same characters as `text`, -1 if the characters in
    434   * this are bitwise less than the characters in `text`, +1 if the
    435   * characters in this are bitwise greater than the characters
    436   * in `text`.
    437   * @stable ICU 2.0
    438   */
    439  inline int8_t compare(const UnicodeString& text) const;
    440 
    441  /**
    442   * Compare the characters bitwise in the range
    443   * [`start`, `start + length`) with the characters
    444   * in the **entire string** `text`.
    445   * (The parameters "start" and "length" are not applied to the other text "text".)
    446   * @param start the offset at which the compare operation begins
    447   * @param length the number of characters of text to compare.
    448   * @param text the other text to be compared against this string.
    449   * @return The result of bitwise character comparison: 0 if this
    450   * contains the same characters as `text`, -1 if the characters in
    451   * this are bitwise less than the characters in `text`, +1 if the
    452   * characters in this are bitwise greater than the characters
    453   * in `text`.
    454   * @stable ICU 2.0
    455   */
    456  inline int8_t compare(int32_t start,
    457         int32_t length,
    458         const UnicodeString& text) const;
    459 
    460  /**
    461   * Compare the characters bitwise in the range
    462   * [`start`, `start + length`) with the characters
    463   * in `srcText` in the range
    464   * [`srcStart`, `srcStart + srcLength`).
    465   * @param start the offset at which the compare operation begins
    466   * @param length the number of characters in this to compare.
    467   * @param srcText the text to be compared
    468   * @param srcStart the offset into `srcText` to start comparison
    469   * @param srcLength the number of characters in `src` to compare
    470   * @return The result of bitwise character comparison: 0 if this
    471   * contains the same characters as `srcText`, -1 if the characters in
    472   * this are bitwise less than the characters in `srcText`, +1 if the
    473   * characters in this are bitwise greater than the characters
    474   * in `srcText`.
    475   * @stable ICU 2.0
    476   */
    477   inline int8_t compare(int32_t start,
    478         int32_t length,
    479         const UnicodeString& srcText,
    480         int32_t srcStart,
    481         int32_t srcLength) const;
    482 
    483  /**
    484   * Compare the characters bitwise in this UnicodeString with the first
    485   * `srcLength` characters in `srcChars`.
    486   * @param srcChars The characters to compare to this UnicodeString.
    487   * @param srcLength the number of characters in `srcChars` to compare
    488   * @return The result of bitwise character comparison: 0 if this
    489   * contains the same characters as `srcChars`, -1 if the characters in
    490   * this are bitwise less than the characters in `srcChars`, +1 if the
    491   * characters in this are bitwise greater than the characters
    492   * in `srcChars`.
    493   * @stable ICU 2.0
    494   */
    495  inline int8_t compare(ConstChar16Ptr srcChars,
    496         int32_t srcLength) const;
    497 
    498  /**
    499   * Compare the characters bitwise in the range
    500   * [`start`, `start + length`) with the first
    501   * `length` characters in `srcChars`
    502   * @param start the offset at which the compare operation begins
    503   * @param length the number of characters to compare.
    504   * @param srcChars the characters to be compared
    505   * @return The result of bitwise character comparison: 0 if this
    506   * contains the same characters as `srcChars`, -1 if the characters in
    507   * this are bitwise less than the characters in `srcChars`, +1 if the
    508   * characters in this are bitwise greater than the characters
    509   * in `srcChars`.
    510   * @stable ICU 2.0
    511   */
    512  inline int8_t compare(int32_t start,
    513         int32_t length,
    514         const char16_t *srcChars) const;
    515 
    516  /**
    517   * Compare the characters bitwise in the range
    518   * [`start`, `start + length`) with the characters
    519   * in `srcChars` in the range
    520   * [`srcStart`, `srcStart + srcLength`).
    521   * @param start the offset at which the compare operation begins
    522   * @param length the number of characters in this to compare
    523   * @param srcChars the characters to be compared
    524   * @param srcStart the offset into `srcChars` to start comparison
    525   * @param srcLength the number of characters in `srcChars` to compare
    526   * @return The result of bitwise character comparison: 0 if this
    527   * contains the same characters as `srcChars`, -1 if the characters in
    528   * this are bitwise less than the characters in `srcChars`, +1 if the
    529   * characters in this are bitwise greater than the characters
    530   * in `srcChars`.
    531   * @stable ICU 2.0
    532   */
    533  inline int8_t compare(int32_t start,
    534         int32_t length,
    535         const char16_t *srcChars,
    536         int32_t srcStart,
    537         int32_t srcLength) const;
    538 
    539  /**
    540   * Compare the characters bitwise in the range
    541   * [`start`, `limit`) with the characters
    542   * in `srcText` in the range
    543   * [`srcStart`, `srcLimit`).
    544   * @param start the offset at which the compare operation begins
    545   * @param limit the offset immediately following the compare operation
    546   * @param srcText the text to be compared
    547   * @param srcStart the offset into `srcText` to start comparison
    548   * @param srcLimit the offset into `srcText` to limit comparison
    549   * @return The result of bitwise character comparison: 0 if this
    550   * contains the same characters as `srcText`, -1 if the characters in
    551   * this are bitwise less than the characters in `srcText`, +1 if the
    552   * characters in this are bitwise greater than the characters
    553   * in `srcText`.
    554   * @stable ICU 2.0
    555   */
    556  inline int8_t compareBetween(int32_t start,
    557            int32_t limit,
    558            const UnicodeString& srcText,
    559            int32_t srcStart,
    560            int32_t srcLimit) const;
    561 
    562  /**
    563   * Compare two Unicode strings in code point order.
    564   * The result may be different from the results of compare(), operator<, etc.
    565   * if supplementary characters are present:
    566   *
    567   * In UTF-16, supplementary characters (with code points U+10000 and above) are
    568   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
    569   * which means that they compare as less than some other BMP characters like U+feff.
    570   * This function compares Unicode strings in code point order.
    571   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
    572   *
    573   * @param text Another string to compare this one to.
    574   * @return a negative/zero/positive integer corresponding to whether
    575   * this string is less than/equal to/greater than the second one
    576   * in code point order
    577   * @stable ICU 2.0
    578   */
    579  inline int8_t compareCodePointOrder(const UnicodeString& text) const;
    580 
    581  /**
    582   * Compare two Unicode strings in code point order.
    583   * The result may be different from the results of compare(), operator<, etc.
    584   * if supplementary characters are present:
    585   *
    586   * In UTF-16, supplementary characters (with code points U+10000 and above) are
    587   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
    588   * which means that they compare as less than some other BMP characters like U+feff.
    589   * This function compares Unicode strings in code point order.
    590   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
    591   *
    592   * @param start The start offset in this string at which the compare operation begins.
    593   * @param length The number of code units from this string to compare.
    594   * @param srcText Another string to compare this one to.
    595   * @return a negative/zero/positive integer corresponding to whether
    596   * this string is less than/equal to/greater than the second one
    597   * in code point order
    598   * @stable ICU 2.0
    599   */
    600  inline int8_t compareCodePointOrder(int32_t start,
    601                                      int32_t length,
    602                                      const UnicodeString& srcText) const;
    603 
    604  /**
    605   * Compare two Unicode strings in code point order.
    606   * The result may be different from the results of compare(), operator<, etc.
    607   * if supplementary characters are present:
    608   *
    609   * In UTF-16, supplementary characters (with code points U+10000 and above) are
    610   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
    611   * which means that they compare as less than some other BMP characters like U+feff.
    612   * This function compares Unicode strings in code point order.
    613   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
    614   *
    615   * @param start The start offset in this string at which the compare operation begins.
    616   * @param length The number of code units from this string to compare.
    617   * @param srcText Another string to compare this one to.
    618   * @param srcStart The start offset in that string at which the compare operation begins.
    619   * @param srcLength The number of code units from that string to compare.
    620   * @return a negative/zero/positive integer corresponding to whether
    621   * this string is less than/equal to/greater than the second one
    622   * in code point order
    623   * @stable ICU 2.0
    624   */
    625   inline int8_t compareCodePointOrder(int32_t start,
    626                                       int32_t length,
    627                                       const UnicodeString& srcText,
    628                                       int32_t srcStart,
    629                                       int32_t srcLength) const;
    630 
    631  /**
    632   * Compare two Unicode strings in code point order.
    633   * The result may be different from the results of compare(), operator<, etc.
    634   * if supplementary characters are present:
    635   *
    636   * In UTF-16, supplementary characters (with code points U+10000 and above) are
    637   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
    638   * which means that they compare as less than some other BMP characters like U+feff.
    639   * This function compares Unicode strings in code point order.
    640   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
    641   *
    642   * @param srcChars A pointer to another string to compare this one to.
    643   * @param srcLength The number of code units from that string to compare.
    644   * @return a negative/zero/positive integer corresponding to whether
    645   * this string is less than/equal to/greater than the second one
    646   * in code point order
    647   * @stable ICU 2.0
    648   */
    649  inline int8_t compareCodePointOrder(ConstChar16Ptr srcChars,
    650                                      int32_t srcLength) const;
    651 
    652  /**
    653   * Compare two Unicode strings in code point order.
    654   * The result may be different from the results of compare(), operator<, etc.
    655   * if supplementary characters are present:
    656   *
    657   * In UTF-16, supplementary characters (with code points U+10000 and above) are
    658   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
    659   * which means that they compare as less than some other BMP characters like U+feff.
    660   * This function compares Unicode strings in code point order.
    661   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
    662   *
    663   * @param start The start offset in this string at which the compare operation begins.
    664   * @param length The number of code units from this string to compare.
    665   * @param srcChars A pointer to another string to compare this one to.
    666   * @return a negative/zero/positive integer corresponding to whether
    667   * this string is less than/equal to/greater than the second one
    668   * in code point order
    669   * @stable ICU 2.0
    670   */
    671  inline int8_t compareCodePointOrder(int32_t start,
    672                                      int32_t length,
    673                                      const char16_t *srcChars) const;
    674 
    675  /**
    676   * Compare two Unicode strings in code point order.
    677   * The result may be different from the results of compare(), operator<, etc.
    678   * if supplementary characters are present:
    679   *
    680   * In UTF-16, supplementary characters (with code points U+10000 and above) are
    681   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
    682   * which means that they compare as less than some other BMP characters like U+feff.
    683   * This function compares Unicode strings in code point order.
    684   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
    685   *
    686   * @param start The start offset in this string at which the compare operation begins.
    687   * @param length The number of code units from this string to compare.
    688   * @param srcChars A pointer to another string to compare this one to.
    689   * @param srcStart The start offset in that string at which the compare operation begins.
    690   * @param srcLength The number of code units from that string to compare.
    691   * @return a negative/zero/positive integer corresponding to whether
    692   * this string is less than/equal to/greater than the second one
    693   * in code point order
    694   * @stable ICU 2.0
    695   */
    696  inline int8_t compareCodePointOrder(int32_t start,
    697                                      int32_t length,
    698                                      const char16_t *srcChars,
    699                                      int32_t srcStart,
    700                                      int32_t srcLength) const;
    701 
    702  /**
    703   * Compare two Unicode strings in code point order.
    704   * The result may be different from the results of compare(), operator<, etc.
    705   * if supplementary characters are present:
    706   *
    707   * In UTF-16, supplementary characters (with code points U+10000 and above) are
    708   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
    709   * which means that they compare as less than some other BMP characters like U+feff.
    710   * This function compares Unicode strings in code point order.
    711   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
    712   *
    713   * @param start The start offset in this string at which the compare operation begins.
    714   * @param limit The offset after the last code unit from this string to compare.
    715   * @param srcText Another string to compare this one to.
    716   * @param srcStart The start offset in that string at which the compare operation begins.
    717   * @param srcLimit The offset after the last code unit from that string to compare.
    718   * @return a negative/zero/positive integer corresponding to whether
    719   * this string is less than/equal to/greater than the second one
    720   * in code point order
    721   * @stable ICU 2.0
    722   */
    723  inline int8_t compareCodePointOrderBetween(int32_t start,
    724                                             int32_t limit,
    725                                             const UnicodeString& srcText,
    726                                             int32_t srcStart,
    727                                             int32_t srcLimit) const;
    728 
    729  /**
    730   * Compare two strings case-insensitively using full case folding.
    731   * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
    732   *
    733   * @param text Another string to compare this one to.
    734   * @param options A bit set of options:
    735   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
    736   *     Comparison in code unit order with default case folding.
    737   *
    738   *   - U_COMPARE_CODE_POINT_ORDER
    739   *     Set to choose code point order instead of code unit order
    740   *     (see u_strCompare for details).
    741   *
    742   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
    743   *
    744   * @return A negative, zero, or positive integer indicating the comparison result.
    745   * @stable ICU 2.0
    746   */
    747  inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
    748 
    749  /**
    750   * Compare two strings case-insensitively using full case folding.
    751   * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
    752   *
    753   * @param start The start offset in this string at which the compare operation begins.
    754   * @param length The number of code units from this string to compare.
    755   * @param srcText Another string to compare this one to.
    756   * @param options A bit set of options:
    757   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
    758   *     Comparison in code unit order with default case folding.
    759   *
    760   *   - U_COMPARE_CODE_POINT_ORDER
    761   *     Set to choose code point order instead of code unit order
    762   *     (see u_strCompare for details).
    763   *
    764   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
    765   *
    766   * @return A negative, zero, or positive integer indicating the comparison result.
    767   * @stable ICU 2.0
    768   */
    769  inline int8_t caseCompare(int32_t start,
    770         int32_t length,
    771         const UnicodeString& srcText,
    772         uint32_t options) const;
    773 
    774  /**
    775   * Compare two strings case-insensitively using full case folding.
    776   * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
    777   *
    778   * @param start The start offset in this string at which the compare operation begins.
    779   * @param length The number of code units from this string to compare.
    780   * @param srcText Another string to compare this one to.
    781   * @param srcStart The start offset in that string at which the compare operation begins.
    782   * @param srcLength The number of code units from that string to compare.
    783   * @param options A bit set of options:
    784   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
    785   *     Comparison in code unit order with default case folding.
    786   *
    787   *   - U_COMPARE_CODE_POINT_ORDER
    788   *     Set to choose code point order instead of code unit order
    789   *     (see u_strCompare for details).
    790   *
    791   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
    792   *
    793   * @return A negative, zero, or positive integer indicating the comparison result.
    794   * @stable ICU 2.0
    795   */
    796  inline int8_t caseCompare(int32_t start,
    797         int32_t length,
    798         const UnicodeString& srcText,
    799         int32_t srcStart,
    800         int32_t srcLength,
    801         uint32_t options) const;
    802 
    803  /**
    804   * Compare two strings case-insensitively using full case folding.
    805   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
    806   *
    807   * @param srcChars A pointer to another string to compare this one to.
    808   * @param srcLength The number of code units from that string to compare.
    809   * @param options A bit set of options:
    810   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
    811   *     Comparison in code unit order with default case folding.
    812   *
    813   *   - U_COMPARE_CODE_POINT_ORDER
    814   *     Set to choose code point order instead of code unit order
    815   *     (see u_strCompare for details).
    816   *
    817   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
    818   *
    819   * @return A negative, zero, or positive integer indicating the comparison result.
    820   * @stable ICU 2.0
    821   */
    822  inline int8_t caseCompare(ConstChar16Ptr srcChars,
    823         int32_t srcLength,
    824         uint32_t options) const;
    825 
    826  /**
    827   * Compare two strings case-insensitively using full case folding.
    828   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
    829   *
    830   * @param start The start offset in this string at which the compare operation begins.
    831   * @param length The number of code units from this string to compare.
    832   * @param srcChars A pointer to another string to compare this one to.
    833   * @param options A bit set of options:
    834   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
    835   *     Comparison in code unit order with default case folding.
    836   *
    837   *   - U_COMPARE_CODE_POINT_ORDER
    838   *     Set to choose code point order instead of code unit order
    839   *     (see u_strCompare for details).
    840   *
    841   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
    842   *
    843   * @return A negative, zero, or positive integer indicating the comparison result.
    844   * @stable ICU 2.0
    845   */
    846  inline int8_t caseCompare(int32_t start,
    847         int32_t length,
    848         const char16_t *srcChars,
    849         uint32_t options) const;
    850 
    851  /**
    852   * Compare two strings case-insensitively using full case folding.
    853   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
    854   *
    855   * @param start The start offset in this string at which the compare operation begins.
    856   * @param length The number of code units from this string to compare.
    857   * @param srcChars A pointer to another string to compare this one to.
    858   * @param srcStart The start offset in that string at which the compare operation begins.
    859   * @param srcLength The number of code units from that string to compare.
    860   * @param options A bit set of options:
    861   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
    862   *     Comparison in code unit order with default case folding.
    863   *
    864   *   - U_COMPARE_CODE_POINT_ORDER
    865   *     Set to choose code point order instead of code unit order
    866   *     (see u_strCompare for details).
    867   *
    868   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
    869   *
    870   * @return A negative, zero, or positive integer indicating the comparison result.
    871   * @stable ICU 2.0
    872   */
    873  inline int8_t caseCompare(int32_t start,
    874         int32_t length,
    875         const char16_t *srcChars,
    876         int32_t srcStart,
    877         int32_t srcLength,
    878         uint32_t options) const;
    879 
    880  /**
    881   * Compare two strings case-insensitively using full case folding.
    882   * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
    883   *
    884   * @param start The start offset in this string at which the compare operation begins.
    885   * @param limit The offset after the last code unit from this string to compare.
    886   * @param srcText Another string to compare this one to.
    887   * @param srcStart The start offset in that string at which the compare operation begins.
    888   * @param srcLimit The offset after the last code unit from that string to compare.
    889   * @param options A bit set of options:
    890   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
    891   *     Comparison in code unit order with default case folding.
    892   *
    893   *   - U_COMPARE_CODE_POINT_ORDER
    894   *     Set to choose code point order instead of code unit order
    895   *     (see u_strCompare for details).
    896   *
    897   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
    898   *
    899   * @return A negative, zero, or positive integer indicating the comparison result.
    900   * @stable ICU 2.0
    901   */
    902  inline int8_t caseCompareBetween(int32_t start,
    903            int32_t limit,
    904            const UnicodeString& srcText,
    905            int32_t srcStart,
    906            int32_t srcLimit,
    907            uint32_t options) const;
    908 
    909  /**
    910   * Determine if this starts with the characters in `text`
    911   * @param text The text to match.
    912   * @return true if this starts with the characters in `text`,
    913   * false otherwise
    914   * @stable ICU 2.0
    915   */
    916  inline UBool startsWith(const UnicodeString& text) const;
    917 
    918  /**
    919   * Determine if this starts with the characters in `srcText`
    920   * in the range [`srcStart`, `srcStart + srcLength`).
    921   * @param srcText The text to match.
    922   * @param srcStart the offset into `srcText` to start matching
    923   * @param srcLength the number of characters in `srcText` to match
    924   * @return true if this starts with the characters in `text`,
    925   * false otherwise
    926   * @stable ICU 2.0
    927   */
    928  inline UBool startsWith(const UnicodeString& srcText,
    929            int32_t srcStart,
    930            int32_t srcLength) const;
    931 
    932  /**
    933   * Determine if this starts with the characters in `srcChars`
    934   * @param srcChars The characters to match.
    935   * @param srcLength the number of characters in `srcChars`
    936   * @return true if this starts with the characters in `srcChars`,
    937   * false otherwise
    938   * @stable ICU 2.0
    939   */
    940  inline UBool startsWith(ConstChar16Ptr srcChars,
    941            int32_t srcLength) const;
    942 
    943  /**
    944   * Determine if this ends with the characters in `srcChars`
    945   * in the range  [`srcStart`, `srcStart + srcLength`).
    946   * @param srcChars The characters to match.
    947   * @param srcStart the offset into `srcText` to start matching
    948   * @param srcLength the number of characters in `srcChars` to match
    949   * @return true if this ends with the characters in `srcChars`, false otherwise
    950   * @stable ICU 2.0
    951   */
    952  inline UBool startsWith(const char16_t *srcChars,
    953            int32_t srcStart,
    954            int32_t srcLength) const;
    955 
    956  /**
    957   * Determine if this ends with the characters in `text`
    958   * @param text The text to match.
    959   * @return true if this ends with the characters in `text`,
    960   * false otherwise
    961   * @stable ICU 2.0
    962   */
    963  inline UBool endsWith(const UnicodeString& text) const;
    964 
    965  /**
    966   * Determine if this ends with the characters in `srcText`
    967   * in the range [`srcStart`, `srcStart + srcLength`).
    968   * @param srcText The text to match.
    969   * @param srcStart the offset into `srcText` to start matching
    970   * @param srcLength the number of characters in `srcText` to match
    971   * @return true if this ends with the characters in `text`,
    972   * false otherwise
    973   * @stable ICU 2.0
    974   */
    975  inline UBool endsWith(const UnicodeString& srcText,
    976          int32_t srcStart,
    977          int32_t srcLength) const;
    978 
    979  /**
    980   * Determine if this ends with the characters in `srcChars`
    981   * @param srcChars The characters to match.
    982   * @param srcLength the number of characters in `srcChars`
    983   * @return true if this ends with the characters in `srcChars`,
    984   * false otherwise
    985   * @stable ICU 2.0
    986   */
    987  inline UBool endsWith(ConstChar16Ptr srcChars,
    988          int32_t srcLength) const;
    989 
    990  /**
    991   * Determine if this ends with the characters in `srcChars`
    992   * in the range  [`srcStart`, `srcStart + srcLength`).
    993   * @param srcChars The characters to match.
    994   * @param srcStart the offset into `srcText` to start matching
    995   * @param srcLength the number of characters in `srcChars` to match
    996   * @return true if this ends with the characters in `srcChars`,
    997   * false otherwise
    998   * @stable ICU 2.0
    999   */
   1000  inline UBool endsWith(const char16_t *srcChars,
   1001          int32_t srcStart,
   1002          int32_t srcLength) const;
   1003 
   1004 
   1005  /* Searching - bitwise only */
   1006 
   1007  /**
   1008   * Locate in this the first occurrence of the characters in `text`,
   1009   * using bitwise comparison.
   1010   * @param text The text to search for.
   1011   * @return The offset into this of the start of `text`,
   1012   * or -1 if not found.
   1013   * @stable ICU 2.0
   1014   */
   1015  inline int32_t indexOf(const UnicodeString& text) const;
   1016 
   1017  /**
   1018   * Locate in this the first occurrence of the characters in `text`
   1019   * starting at offset `start`, using bitwise comparison.
   1020   * @param text The text to search for.
   1021   * @param start The offset at which searching will start.
   1022   * @return The offset into this of the start of `text`,
   1023   * or -1 if not found.
   1024   * @stable ICU 2.0
   1025   */
   1026  inline int32_t indexOf(const UnicodeString& text,
   1027              int32_t start) const;
   1028 
   1029  /**
   1030   * Locate in this the first occurrence in the range
   1031   * [`start`, `start + length`) of the characters
   1032   * in `text`, using bitwise comparison.
   1033   * @param text The text to search for.
   1034   * @param start The offset at which searching will start.
   1035   * @param length The number of characters to search
   1036   * @return The offset into this of the start of `text`,
   1037   * or -1 if not found.
   1038   * @stable ICU 2.0
   1039   */
   1040  inline int32_t indexOf(const UnicodeString& text,
   1041              int32_t start,
   1042              int32_t length) const;
   1043 
   1044  /**
   1045   * Locate in this the first occurrence in the range
   1046   * [`start`, `start + length`) of the characters
   1047   *  in `srcText` in the range
   1048   * [`srcStart`, `srcStart + srcLength`),
   1049   * using bitwise comparison.
   1050   * @param srcText The text to search for.
   1051   * @param srcStart the offset into `srcText` at which
   1052   * to start matching
   1053   * @param srcLength the number of characters in `srcText` to match
   1054   * @param start the offset into this at which to start matching
   1055   * @param length the number of characters in this to search
   1056   * @return The offset into this of the start of `text`,
   1057   * or -1 if not found.
   1058   * @stable ICU 2.0
   1059   */
   1060  inline int32_t indexOf(const UnicodeString& srcText,
   1061              int32_t srcStart,
   1062              int32_t srcLength,
   1063              int32_t start,
   1064              int32_t length) const;
   1065 
   1066  /**
   1067   * Locate in this the first occurrence of the characters in
   1068   * `srcChars`
   1069   * starting at offset `start`, using bitwise comparison.
   1070   * @param srcChars The text to search for.
   1071   * @param srcLength the number of characters in `srcChars` to match
   1072   * @param start the offset into this at which to start matching
   1073   * @return The offset into this of the start of `text`,
   1074   * or -1 if not found.
   1075   * @stable ICU 2.0
   1076   */
   1077  inline int32_t indexOf(const char16_t *srcChars,
   1078              int32_t srcLength,
   1079              int32_t start) const;
   1080 
   1081  /**
   1082   * Locate in this the first occurrence in the range
   1083   * [`start`, `start + length`) of the characters
   1084   * in `srcChars`, using bitwise comparison.
   1085   * @param srcChars The text to search for.
   1086   * @param srcLength the number of characters in `srcChars`
   1087   * @param start The offset at which searching will start.
   1088   * @param length The number of characters to search
   1089   * @return The offset into this of the start of `srcChars`,
   1090   * or -1 if not found.
   1091   * @stable ICU 2.0
   1092   */
   1093  inline int32_t indexOf(ConstChar16Ptr srcChars,
   1094              int32_t srcLength,
   1095              int32_t start,
   1096              int32_t length) const;
   1097 
   1098  /**
   1099   * Locate in this the first occurrence in the range
   1100   * [`start`, `start + length`) of the characters
   1101   * in `srcChars` in the range
   1102   * [`srcStart`, `srcStart + srcLength`),
   1103   * using bitwise comparison.
   1104   * @param srcChars The text to search for.
   1105   * @param srcStart the offset into `srcChars` at which
   1106   * to start matching
   1107   * @param srcLength the number of characters in `srcChars` to match
   1108   * @param start the offset into this at which to start matching
   1109   * @param length the number of characters in this to search
   1110   * @return The offset into this of the start of `text`,
   1111   * or -1 if not found.
   1112   * @stable ICU 2.0
   1113   */
   1114  int32_t indexOf(const char16_t *srcChars,
   1115              int32_t srcStart,
   1116              int32_t srcLength,
   1117              int32_t start,
   1118              int32_t length) const;
   1119 
   1120  /**
   1121   * Locate in this the first occurrence of the BMP code point `c`,
   1122   * using bitwise comparison.
   1123   * @param c The code unit to search for.
   1124   * @return The offset into this of `c`, or -1 if not found.
   1125   * @stable ICU 2.0
   1126   */
   1127  inline int32_t indexOf(char16_t c) const;
   1128 
   1129  /**
   1130   * Locate in this the first occurrence of the code point `c`,
   1131   * using bitwise comparison.
   1132   *
   1133   * @param c The code point to search for.
   1134   * @return The offset into this of `c`, or -1 if not found.
   1135   * @stable ICU 2.0
   1136   */
   1137  inline int32_t indexOf(UChar32 c) const;
   1138 
   1139  /**
   1140   * Locate in this the first occurrence of the BMP code point `c`,
   1141   * starting at offset `start`, using bitwise comparison.
   1142   * @param c The code unit to search for.
   1143   * @param start The offset at which searching will start.
   1144   * @return The offset into this of `c`, or -1 if not found.
   1145   * @stable ICU 2.0
   1146   */
   1147  inline int32_t indexOf(char16_t c,
   1148              int32_t start) const;
   1149 
   1150  /**
   1151   * Locate in this the first occurrence of the code point `c`
   1152   * starting at offset `start`, using bitwise comparison.
   1153   *
   1154   * @param c The code point to search for.
   1155   * @param start The offset at which searching will start.
   1156   * @return The offset into this of `c`, or -1 if not found.
   1157   * @stable ICU 2.0
   1158   */
   1159  inline int32_t indexOf(UChar32 c,
   1160              int32_t start) const;
   1161 
   1162  /**
   1163   * Locate in this the first occurrence of the BMP code point `c`
   1164   * in the range [`start`, `start + length`),
   1165   * using bitwise comparison.
   1166   * @param c The code unit to search for.
   1167   * @param start the offset into this at which to start matching
   1168   * @param length the number of characters in this to search
   1169   * @return The offset into this of `c`, or -1 if not found.
   1170   * @stable ICU 2.0
   1171   */
   1172  inline int32_t indexOf(char16_t c,
   1173              int32_t start,
   1174              int32_t length) const;
   1175 
   1176  /**
   1177   * Locate in this the first occurrence of the code point `c`
   1178   * in the range [`start`, `start + length`),
   1179   * using bitwise comparison.
   1180   *
   1181   * @param c The code point to search for.
   1182   * @param start the offset into this at which to start matching
   1183   * @param length the number of characters in this to search
   1184   * @return The offset into this of `c`, or -1 if not found.
   1185   * @stable ICU 2.0
   1186   */
   1187  inline int32_t indexOf(UChar32 c,
   1188              int32_t start,
   1189              int32_t length) const;
   1190 
   1191  /**
   1192   * Locate in this the last occurrence of the characters in `text`,
   1193   * using bitwise comparison.
   1194   * @param text The text to search for.
   1195   * @return The offset into this of the start of `text`,
   1196   * or -1 if not found.
   1197   * @stable ICU 2.0
   1198   */
   1199  inline int32_t lastIndexOf(const UnicodeString& text) const;
   1200 
   1201  /**
   1202   * Locate in this the last occurrence of the characters in `text`
   1203   * starting at offset `start`, using bitwise comparison.
   1204   * @param text The text to search for.
   1205   * @param start The offset at which searching will start.
   1206   * @return The offset into this of the start of `text`,
   1207   * or -1 if not found.
   1208   * @stable ICU 2.0
   1209   */
   1210  inline int32_t lastIndexOf(const UnicodeString& text,
   1211              int32_t start) const;
   1212 
   1213  /**
   1214   * Locate in this the last occurrence in the range
   1215   * [`start`, `start + length`) of the characters
   1216   * in `text`, using bitwise comparison.
   1217   * @param text The text to search for.
   1218   * @param start The offset at which searching will start.
   1219   * @param length The number of characters to search
   1220   * @return The offset into this of the start of `text`,
   1221   * or -1 if not found.
   1222   * @stable ICU 2.0
   1223   */
   1224  inline int32_t lastIndexOf(const UnicodeString& text,
   1225              int32_t start,
   1226              int32_t length) const;
   1227 
   1228  /**
   1229   * Locate in this the last occurrence in the range
   1230   * [`start`, `start + length`) of the characters
   1231   * in `srcText` in the range
   1232   * [`srcStart`, `srcStart + srcLength`),
   1233   * using bitwise comparison.
   1234   * @param srcText The text to search for.
   1235   * @param srcStart the offset into `srcText` at which
   1236   * to start matching
   1237   * @param srcLength the number of characters in `srcText` to match
   1238   * @param start the offset into this at which to start matching
   1239   * @param length the number of characters in this to search
   1240   * @return The offset into this of the start of `text`,
   1241   * or -1 if not found.
   1242   * @stable ICU 2.0
   1243   */
   1244  inline int32_t lastIndexOf(const UnicodeString& srcText,
   1245              int32_t srcStart,
   1246              int32_t srcLength,
   1247              int32_t start,
   1248              int32_t length) const;
   1249 
   1250  /**
   1251   * Locate in this the last occurrence of the characters in `srcChars`
   1252   * starting at offset `start`, using bitwise comparison.
   1253   * @param srcChars The text to search for.
   1254   * @param srcLength the number of characters in `srcChars` to match
   1255   * @param start the offset into this at which to start matching
   1256   * @return The offset into this of the start of `text`,
   1257   * or -1 if not found.
   1258   * @stable ICU 2.0
   1259   */
   1260  inline int32_t lastIndexOf(const char16_t *srcChars,
   1261              int32_t srcLength,
   1262              int32_t start) const;
   1263 
   1264  /**
   1265   * Locate in this the last occurrence in the range
   1266   * [`start`, `start + length`) of the characters
   1267   * in `srcChars`, using bitwise comparison.
   1268   * @param srcChars The text to search for.
   1269   * @param srcLength the number of characters in `srcChars`
   1270   * @param start The offset at which searching will start.
   1271   * @param length The number of characters to search
   1272   * @return The offset into this of the start of `srcChars`,
   1273   * or -1 if not found.
   1274   * @stable ICU 2.0
   1275   */
   1276  inline int32_t lastIndexOf(ConstChar16Ptr srcChars,
   1277              int32_t srcLength,
   1278              int32_t start,
   1279              int32_t length) const;
   1280 
   1281  /**
   1282   * Locate in this the last occurrence in the range
   1283   * [`start`, `start + length`) of the characters
   1284   * in `srcChars` in the range
   1285   * [`srcStart`, `srcStart + srcLength`),
   1286   * using bitwise comparison.
   1287   * @param srcChars The text to search for.
   1288   * @param srcStart the offset into `srcChars` at which
   1289   * to start matching
   1290   * @param srcLength the number of characters in `srcChars` to match
   1291   * @param start the offset into this at which to start matching
   1292   * @param length the number of characters in this to search
   1293   * @return The offset into this of the start of `text`,
   1294   * or -1 if not found.
   1295   * @stable ICU 2.0
   1296   */
   1297  int32_t lastIndexOf(const char16_t *srcChars,
   1298              int32_t srcStart,
   1299              int32_t srcLength,
   1300              int32_t start,
   1301              int32_t length) const;
   1302 
   1303  /**
   1304   * Locate in this the last occurrence of the BMP code point `c`,
   1305   * using bitwise comparison.
   1306   * @param c The code unit to search for.
   1307   * @return The offset into this of `c`, or -1 if not found.
   1308   * @stable ICU 2.0
   1309   */
   1310  inline int32_t lastIndexOf(char16_t c) const;
   1311 
   1312  /**
   1313   * Locate in this the last occurrence of the code point `c`,
   1314   * using bitwise comparison.
   1315   *
   1316   * @param c The code point to search for.
   1317   * @return The offset into this of `c`, or -1 if not found.
   1318   * @stable ICU 2.0
   1319   */
   1320  inline int32_t lastIndexOf(UChar32 c) const;
   1321 
   1322  /**
   1323   * Locate in this the last occurrence of the BMP code point `c`
   1324   * starting at offset `start`, using bitwise comparison.
   1325   * @param c The code unit to search for.
   1326   * @param start The offset at which searching will start.
   1327   * @return The offset into this of `c`, or -1 if not found.
   1328   * @stable ICU 2.0
   1329   */
   1330  inline int32_t lastIndexOf(char16_t c,
   1331              int32_t start) const;
   1332 
   1333  /**
   1334   * Locate in this the last occurrence of the code point `c`
   1335   * starting at offset `start`, using bitwise comparison.
   1336   *
   1337   * @param c The code point to search for.
   1338   * @param start The offset at which searching will start.
   1339   * @return The offset into this of `c`, or -1 if not found.
   1340   * @stable ICU 2.0
   1341   */
   1342  inline int32_t lastIndexOf(UChar32 c,
   1343              int32_t start) const;
   1344 
   1345  /**
   1346   * Locate in this the last occurrence of the BMP code point `c`
   1347   * in the range [`start`, `start + length`),
   1348   * using bitwise comparison.
   1349   * @param c The code unit to search for.
   1350   * @param start the offset into this at which to start matching
   1351   * @param length the number of characters in this to search
   1352   * @return The offset into this of `c`, or -1 if not found.
   1353   * @stable ICU 2.0
   1354   */
   1355  inline int32_t lastIndexOf(char16_t c,
   1356              int32_t start,
   1357              int32_t length) const;
   1358 
   1359  /**
   1360   * Locate in this the last occurrence of the code point `c`
   1361   * in the range [`start`, `start + length`),
   1362   * using bitwise comparison.
   1363   *
   1364   * @param c The code point to search for.
   1365   * @param start the offset into this at which to start matching
   1366   * @param length the number of characters in this to search
   1367   * @return The offset into this of `c`, or -1 if not found.
   1368   * @stable ICU 2.0
   1369   */
   1370  inline int32_t lastIndexOf(UChar32 c,
   1371              int32_t start,
   1372              int32_t length) const;
   1373 
   1374 
   1375  /* Character access */
   1376 
   1377  /**
   1378   * Return the code unit at offset `offset`.
   1379   * If the offset is not valid (0..length()-1) then U+ffff is returned.
   1380   * @param offset a valid offset into the text
   1381   * @return the code unit at offset `offset`
   1382   *         or 0xffff if the offset is not valid for this string
   1383   * @stable ICU 2.0
   1384   */
   1385  inline char16_t charAt(int32_t offset) const;
   1386 
   1387  /**
   1388   * Return the code unit at offset `offset`.
   1389   * If the offset is not valid (0..length()-1) then U+ffff is returned.
   1390   * @param offset a valid offset into the text
   1391   * @return the code unit at offset `offset`
   1392   * @stable ICU 2.0
   1393   */
   1394  inline char16_t operator[] (int32_t offset) const;
   1395 
   1396  /**
   1397   * Return the code point that contains the code unit
   1398   * at offset `offset`.
   1399   * If the offset is not valid (0..length()-1) then U+ffff is returned.
   1400   * @param offset a valid offset into the text
   1401   * that indicates the text offset of any of the code units
   1402   * that will be assembled into a code point (21-bit value) and returned
   1403   * @return the code point of text at `offset`
   1404   *         or 0xffff if the offset is not valid for this string
   1405   * @stable ICU 2.0
   1406   */
   1407  UChar32 char32At(int32_t offset) const;
   1408 
   1409  /**
   1410   * Adjust a random-access offset so that
   1411   * it points to the beginning of a Unicode character.
   1412   * The offset that is passed in points to
   1413   * any code unit of a code point,
   1414   * while the returned offset will point to the first code unit
   1415   * of the same code point.
   1416   * In UTF-16, if the input offset points to a second surrogate
   1417   * of a surrogate pair, then the returned offset will point
   1418   * to the first surrogate.
   1419   * @param offset a valid offset into one code point of the text
   1420   * @return offset of the first code unit of the same code point
   1421   * @see U16_SET_CP_START
   1422   * @stable ICU 2.0
   1423   */
   1424  int32_t getChar32Start(int32_t offset) const;
   1425 
   1426  /**
   1427   * Adjust a random-access offset so that
   1428   * it points behind a Unicode character.
   1429   * The offset that is passed in points behind
   1430   * any code unit of a code point,
   1431   * while the returned offset will point behind the last code unit
   1432   * of the same code point.
   1433   * In UTF-16, if the input offset points behind the first surrogate
   1434   * (i.e., to the second surrogate)
   1435   * of a surrogate pair, then the returned offset will point
   1436   * behind the second surrogate (i.e., to the first surrogate).
   1437   * @param offset a valid offset after any code unit of a code point of the text
   1438   * @return offset of the first code unit after the same code point
   1439   * @see U16_SET_CP_LIMIT
   1440   * @stable ICU 2.0
   1441   */
   1442  int32_t getChar32Limit(int32_t offset) const;
   1443 
   1444  /**
   1445   * Move the code unit index along the string by delta code points.
   1446   * Interpret the input index as a code unit-based offset into the string,
   1447   * move the index forward or backward by delta code points, and
   1448   * return the resulting index.
   1449   * The input index should point to the first code unit of a code point,
   1450   * if there is more than one.
   1451   *
   1452   * Both input and output indexes are code unit-based as for all
   1453   * string indexes/offsets in ICU (and other libraries, like MBCS char*).
   1454   * If delta<0 then the index is moved backward (toward the start of the string).
   1455   * If delta>0 then the index is moved forward (toward the end of the string).
   1456   *
   1457   * This behaves like CharacterIterator::move32(delta, kCurrent).
   1458   *
   1459   * Behavior for out-of-bounds indexes:
   1460   * `moveIndex32` pins the input index to 0..length(), i.e.,
   1461   * if the input index<0 then it is pinned to 0;
   1462   * if it is index>length() then it is pinned to length().
   1463   * Afterwards, the index is moved by `delta` code points
   1464   * forward or backward,
   1465   * but no further backward than to 0 and no further forward than to length().
   1466   * The resulting index return value will be in between 0 and length(), inclusively.
   1467   *
   1468   * Examples:
   1469   * \code
   1470   *     // s has code points 'a' U+10000 'b' U+10ffff U+2029
   1471   *     UnicodeString s(u"a\U00010000b\U0010ffff\u2029");
   1472   *
   1473   *     // initial index: position of U+10000
   1474   *     int32_t index=1;
   1475   *
   1476   *     // the following examples will all result in index==4, position of U+10ffff
   1477   *
   1478   *     // skip 2 code points from some position in the string
   1479   *     index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
   1480   *
   1481   *     // go to the 3rd code point from the start of s (0-based)
   1482   *     index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
   1483   *
   1484   *     // go to the next-to-last code point of s
   1485   *     index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
   1486   * \endcode
   1487   *
   1488   * @param index input code unit index
   1489   * @param delta (signed) code point count to move the index forward or backward
   1490   *        in the string
   1491   * @return the resulting code unit index
   1492   * @stable ICU 2.0
   1493   */
   1494  int32_t moveIndex32(int32_t index, int32_t delta) const;
   1495 
   1496  /* Substring extraction */
   1497 
   1498  /**
   1499   * Copy the characters in the range
   1500   * [`start`, `start + length`) into the array `dst`,
   1501   * beginning at `dstStart`.
   1502   * If the string aliases to `dst` itself as an external buffer,
   1503   * then extract() will not copy the contents.
   1504   *
   1505   * @param start offset of first character which will be copied into the array
   1506   * @param length the number of characters to extract
   1507   * @param dst array in which to copy characters.  The length of `dst`
   1508   * must be at least (`dstStart + length`).
   1509   * @param dstStart the offset in `dst` where the first character
   1510   * will be extracted
   1511   * @stable ICU 2.0
   1512   */
   1513  inline void extract(int32_t start,
   1514           int32_t length,
   1515           Char16Ptr dst,
   1516           int32_t dstStart = 0) const;
   1517 
   1518  /**
   1519   * Copy the contents of the string into dest.
   1520   * This is a convenience function that
   1521   * checks if there is enough space in dest,
   1522   * extracts the entire string if possible,
   1523   * and NUL-terminates dest if possible.
   1524   *
   1525   * If the string fits into dest but cannot be NUL-terminated
   1526   * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
   1527   * If the string itself does not fit into dest
   1528   * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
   1529   *
   1530   * If the string aliases to `dest` itself as an external buffer,
   1531   * then extract() will not copy the contents.
   1532   *
   1533   * @param dest Destination string buffer.
   1534   * @param destCapacity Number of char16_ts available at dest.
   1535   * @param errorCode ICU error code.
   1536   * @return length()
   1537   * @stable ICU 2.0
   1538   */
   1539  int32_t
   1540  extract(Char16Ptr dest, int32_t destCapacity,
   1541          UErrorCode &errorCode) const;
   1542 
   1543  /**
   1544   * Copy the characters in the range
   1545   * [`start`, `start + length`) into the  UnicodeString
   1546   * `target`.
   1547   * @param start offset of first character which will be copied
   1548   * @param length the number of characters to extract
   1549   * @param target UnicodeString into which to copy characters.
   1550   * @stable ICU 2.0
   1551   */
   1552  inline void extract(int32_t start,
   1553           int32_t length,
   1554           UnicodeString& target) const;
   1555 
   1556  /**
   1557   * Copy the characters in the range [`start`, `limit`)
   1558   * into the array `dst`, beginning at `dstStart`.
   1559   * @param start offset of first character which will be copied into the array
   1560   * @param limit offset immediately following the last character to be copied
   1561   * @param dst array in which to copy characters.  The length of `dst`
   1562   * must be at least (`dstStart + (limit - start)`).
   1563   * @param dstStart the offset in `dst` where the first character
   1564   * will be extracted
   1565   * @stable ICU 2.0
   1566   */
   1567  inline void extractBetween(int32_t start,
   1568              int32_t limit,
   1569              char16_t *dst,
   1570              int32_t dstStart = 0) const;
   1571 
   1572  /**
   1573   * Copy the characters in the range [`start`, `limit`)
   1574   * into the UnicodeString `target`.  Replaceable API.
   1575   * @param start offset of first character which will be copied
   1576   * @param limit offset immediately following the last character to be copied
   1577   * @param target UnicodeString into which to copy characters.
   1578   * @stable ICU 2.0
   1579   */
   1580  virtual void extractBetween(int32_t start,
   1581              int32_t limit,
   1582              UnicodeString& target) const override;
   1583 
   1584  /**
   1585   * Copy the characters in the range
   1586   * [`start`, `start + startLength`) into an array of characters.
   1587   * All characters must be invariant (see utypes.h).
   1588   * Use US_INV as the last, signature-distinguishing parameter.
   1589   *
   1590   * This function does not write any more than `targetCapacity`
   1591   * characters but returns the length of the entire output string
   1592   * so that one can allocate a larger buffer and call the function again
   1593   * if necessary.
   1594   * The output string is NUL-terminated if possible.
   1595   *
   1596   * @param start offset of first character which will be copied
   1597   * @param startLength the number of characters to extract
   1598   * @param target the target buffer for extraction, can be nullptr
   1599   *               if targetLength is 0
   1600   * @param targetCapacity the length of the target buffer
   1601   * @param inv Signature-distinguishing parameter, use US_INV.
   1602   * @return the output string length, not including the terminating NUL
   1603   * @stable ICU 3.2
   1604   */
   1605  int32_t extract(int32_t start,
   1606           int32_t startLength,
   1607           char *target,
   1608           int32_t targetCapacity,
   1609           enum EInvariant inv) const;
   1610 
   1611 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
   1612 
   1613  /**
   1614   * Copy the characters in the range
   1615   * [`start`, `start + length`) into an array of characters
   1616   * in the platform's default codepage.
   1617   * This function does not write any more than `targetLength`
   1618   * characters but returns the length of the entire output string
   1619   * so that one can allocate a larger buffer and call the function again
   1620   * if necessary.
   1621   * The output string is NUL-terminated if possible.
   1622   *
   1623   * @param start offset of first character which will be copied
   1624   * @param startLength the number of characters to extract
   1625   * @param target the target buffer for extraction
   1626   * @param targetLength the length of the target buffer
   1627   * If `target` is nullptr, then the number of bytes required for
   1628   * `target` is returned.
   1629   * @return the output string length, not including the terminating NUL
   1630   * @stable ICU 2.0
   1631   */
   1632  int32_t extract(int32_t start,
   1633           int32_t startLength,
   1634           char *target,
   1635           uint32_t targetLength) const;
   1636 
   1637 #endif
   1638 
   1639 #if !UCONFIG_NO_CONVERSION
   1640 
   1641  /**
   1642   * Copy the characters in the range
   1643   * [`start`, `start + length`) into an array of characters
   1644   * in a specified codepage.
   1645   * The output string is NUL-terminated.
   1646   *
   1647   * Recommendation: For invariant-character strings use
   1648   * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
   1649   * because it avoids object code dependencies of UnicodeString on
   1650   * the conversion code.
   1651   *
   1652   * @param start offset of first character which will be copied
   1653   * @param startLength the number of characters to extract
   1654   * @param target the target buffer for extraction
   1655   * @param codepage the desired codepage for the characters.  0 has
   1656   * the special meaning of the default codepage
   1657   * If `codepage` is an empty string (`""`),
   1658   * then a simple conversion is performed on the codepage-invariant
   1659   * subset ("invariant characters") of the platform encoding. See utypes.h.
   1660   * If `target` is nullptr, then the number of bytes required for
   1661   * `target` is returned. It is assumed that the target is big enough
   1662   * to fit all of the characters.
   1663   * @return the output string length, not including the terminating NUL
   1664   * @stable ICU 2.0
   1665   */
   1666  inline int32_t extract(int32_t start,
   1667                         int32_t startLength,
   1668                         char* target,
   1669                         const char* codepage = nullptr) const;
   1670 
   1671  /**
   1672   * Copy the characters in the range
   1673   * [`start`, `start + length`) into an array of characters
   1674   * in a specified codepage.
   1675   * This function does not write any more than `targetLength`
   1676   * characters but returns the length of the entire output string
   1677   * so that one can allocate a larger buffer and call the function again
   1678   * if necessary.
   1679   * The output string is NUL-terminated if possible.
   1680   *
   1681   * Recommendation: For invariant-character strings use
   1682   * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
   1683   * because it avoids object code dependencies of UnicodeString on
   1684   * the conversion code.
   1685   *
   1686   * @param start offset of first character which will be copied
   1687   * @param startLength the number of characters to extract
   1688   * @param target the target buffer for extraction
   1689   * @param targetLength the length of the target buffer
   1690   * @param codepage the desired codepage for the characters.  0 has
   1691   * the special meaning of the default codepage
   1692   * If `codepage` is an empty string (`""`),
   1693   * then a simple conversion is performed on the codepage-invariant
   1694   * subset ("invariant characters") of the platform encoding. See utypes.h.
   1695   * If `target` is nullptr, then the number of bytes required for
   1696   * `target` is returned.
   1697   * @return the output string length, not including the terminating NUL
   1698   * @stable ICU 2.0
   1699   */
   1700  int32_t extract(int32_t start,
   1701           int32_t startLength,
   1702           char *target,
   1703           uint32_t targetLength,
   1704           const char *codepage) const;
   1705 
   1706  /**
   1707   * Convert the UnicodeString into a codepage string using an existing UConverter.
   1708   * The output string is NUL-terminated if possible.
   1709   *
   1710   * This function avoids the overhead of opening and closing a converter if
   1711   * multiple strings are extracted.
   1712   *
   1713   * @param dest destination string buffer, can be nullptr if destCapacity==0
   1714   * @param destCapacity the number of chars available at dest
   1715   * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
   1716   *        or nullptr for the default converter
   1717   * @param errorCode normal ICU error code
   1718   * @return the length of the output string, not counting the terminating NUL;
   1719   *         if the length is greater than destCapacity, then the string will not fit
   1720   *         and a buffer of the indicated length would need to be passed in
   1721   * @stable ICU 2.0
   1722   */
   1723  int32_t extract(char *dest, int32_t destCapacity,
   1724                  UConverter *cnv,
   1725                  UErrorCode &errorCode) const;
   1726 
   1727 #endif
   1728 
   1729  /**
   1730   * Create a temporary substring for the specified range.
   1731   * Unlike the substring constructor and setTo() functions,
   1732   * the object returned here will be a read-only alias (using getBuffer())
   1733   * rather than copying the text.
   1734   * As a result, this substring operation is much faster but requires
   1735   * that the original string not be modified or deleted during the lifetime
   1736   * of the returned substring object.
   1737   * @param start offset of the first character visible in the substring
   1738   * @param length length of the substring
   1739   * @return a read-only alias UnicodeString object for the substring
   1740   * @stable ICU 4.4
   1741   */
   1742  UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
   1743 
   1744  /**
   1745   * Create a temporary substring for the specified range.
   1746   * Same as tempSubString(start, length) except that the substring range
   1747   * is specified as a (start, limit) pair (with an exclusive limit index)
   1748   * rather than a (start, length) pair.
   1749   * @param start offset of the first character visible in the substring
   1750   * @param limit offset immediately following the last character visible in the substring
   1751   * @return a read-only alias UnicodeString object for the substring
   1752   * @stable ICU 4.4
   1753   */
   1754  inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
   1755 
   1756  /**
   1757   * Convert the UnicodeString to UTF-8 and write the result
   1758   * to a ByteSink. This is called by toUTF8String().
   1759   * Unpaired surrogates are replaced with U+FFFD.
   1760   * Calls u_strToUTF8WithSub().
   1761   *
   1762   * @param sink A ByteSink to which the UTF-8 version of the string is written.
   1763   *             sink.Flush() is called at the end.
   1764   * @stable ICU 4.2
   1765   * @see toUTF8String
   1766   */
   1767  void toUTF8(ByteSink &sink) const;
   1768 
   1769  /**
   1770   * Convert the UnicodeString to UTF-8 and append the result
   1771   * to a standard string.
   1772   * Unpaired surrogates are replaced with U+FFFD.
   1773   * Calls toUTF8().
   1774   *
   1775   * @tparam StringClass A std::string or a std::u8string (or a compatible type)
   1776   * @param result A std::string or a std::u8string (or a compatible object)
   1777   *        to which the UTF-8 version of the string is appended.
   1778   * @return The string object.
   1779   * @stable ICU 4.2
   1780   * @see toUTF8
   1781   */
   1782  template<typename StringClass>
   1783  StringClass &toUTF8String(StringClass &result) const {
   1784    StringByteSink<StringClass> sbs(&result, length());
   1785    toUTF8(sbs);
   1786    return result;
   1787  }
   1788 
   1789 #ifndef U_HIDE_DRAFT_API
   1790  /**
   1791   * Convert the UnicodeString to a UTF-8 string.
   1792   * Unpaired surrogates are replaced with U+FFFD.
   1793   * Calls toUTF8().
   1794   *
   1795   * @tparam StringClass A std::string or a std::u8string (or a compatible type)
   1796   * @return A std::string or a std::u8string (or a compatible object)
   1797   *        with the UTF-8 version of the string.
   1798   * @draft ICU 78
   1799   * @see toUTF8
   1800   */
   1801  template<typename StringClass>
   1802  StringClass toUTF8String() const {
   1803    StringClass result;
   1804    StringByteSink<StringClass> sbs(&result, length());
   1805    toUTF8(sbs);
   1806    return result;
   1807  }
   1808 #endif  // U_HIDE_DRAFT_API
   1809 
   1810  /**
   1811   * Convert the UnicodeString to UTF-32.
   1812   * Unpaired surrogates are replaced with U+FFFD.
   1813   * Calls u_strToUTF32WithSub().
   1814   *
   1815   * @param utf32 destination string buffer, can be nullptr if capacity==0
   1816   * @param capacity the number of UChar32s available at utf32
   1817   * @param errorCode Standard ICU error code. Its input value must
   1818   *                  pass the U_SUCCESS() test, or else the function returns
   1819   *                  immediately. Check for U_FAILURE() on output or use with
   1820   *                  function chaining. (See User Guide for details.)
   1821   * @return The length of the UTF-32 string.
   1822   * @see fromUTF32
   1823   * @stable ICU 4.2
   1824   */
   1825  int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
   1826 
   1827  /* Length operations */
   1828 
   1829  /**
   1830   * Return the length of the UnicodeString object.
   1831   * The length is the number of char16_t code units are in the UnicodeString.
   1832   * If you want the number of code points, please use countChar32().
   1833   * @return the length of the UnicodeString object
   1834   * @see countChar32
   1835   * @stable ICU 2.0
   1836   */
   1837  inline int32_t length() const;
   1838 
   1839  /**
   1840   * Count Unicode code points in the length char16_t code units of the string.
   1841   * A code point may occupy either one or two char16_t code units.
   1842   * Counting code points involves reading all code units.
   1843   *
   1844   * This functions is basically the inverse of moveIndex32().
   1845   *
   1846   * @param start the index of the first code unit to check
   1847   * @param length the number of char16_t code units to check
   1848   * @return the number of code points in the specified code units
   1849   * @see length
   1850   * @stable ICU 2.0
   1851   */
   1852  int32_t
   1853  countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
   1854 
   1855  /**
   1856   * Check if the length char16_t code units of the string
   1857   * contain more Unicode code points than a certain number.
   1858   * This is more efficient than counting all code points in this part of the string
   1859   * and comparing that number with a threshold.
   1860   * This function may not need to scan the string at all if the length
   1861   * falls within a certain range, and
   1862   * never needs to count more than 'number+1' code points.
   1863   * Logically equivalent to (countChar32(start, length)>number).
   1864   * A Unicode code point may occupy either one or two char16_t code units.
   1865   *
   1866   * @param start the index of the first code unit to check (0 for the entire string)
   1867   * @param length the number of char16_t code units to check
   1868   *               (use INT32_MAX for the entire string; remember that start/length
   1869   *                values are pinned)
   1870   * @param number The number of code points in the (sub)string is compared against
   1871   *               the 'number' parameter.
   1872   * @return Boolean value for whether the string contains more Unicode code points
   1873   *         than 'number'. Same as (u_countChar32(s, length)>number).
   1874   * @see countChar32
   1875   * @see u_strHasMoreChar32Than
   1876   * @stable ICU 2.4
   1877   */
   1878  UBool
   1879  hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
   1880 
   1881  /**
   1882   * Determine if this string is empty.
   1883   * @return true if this string contains 0 characters, false otherwise.
   1884   * @stable ICU 2.0
   1885   */
   1886  inline UBool isEmpty() const;
   1887 
   1888  /**
   1889   * Return the capacity of the internal buffer of the UnicodeString object.
   1890   * This is useful together with the getBuffer functions.
   1891   * See there for details.
   1892   *
   1893   * @return the number of char16_ts available in the internal buffer
   1894   * @see getBuffer
   1895   * @stable ICU 2.0
   1896   */
   1897  inline int32_t getCapacity() const;
   1898 
   1899  /* Other operations */
   1900 
   1901  /**
   1902   * Generate a hash code for this object.
   1903   * @return The hash code of this UnicodeString.
   1904   * @stable ICU 2.0
   1905   */
   1906  inline int32_t hashCode() const;
   1907 
   1908  /**
   1909   * Determine if this object contains a valid string.
   1910   * A bogus string has no value. It is different from an empty string,
   1911   * although in both cases isEmpty() returns true and length() returns 0.
   1912   * setToBogus() and isBogus() can be used to indicate that no string value is available.
   1913   * For a bogus string, getBuffer() and getTerminatedBuffer() return nullptr, and
   1914   * length() returns 0.
   1915   *
   1916   * @return true if the string is bogus/invalid, false otherwise
   1917   * @see setToBogus()
   1918   * @stable ICU 2.0
   1919   */
   1920  inline UBool isBogus() const;
   1921 
   1922 #ifndef U_HIDE_DRAFT_API
   1923 private:
   1924  // These type aliases are private; there is no guarantee that they will remain
   1925  // aliases to the same types in subsequent versions of ICU.
   1926  // Note that whether `std::u16string_view::const_iterator` is a pointer or a
   1927  // class that models contiguous_iterator is platform-dependent.
   1928  using unspecified_iterator = std::u16string_view::const_iterator;
   1929  using unspecified_reverse_iterator = std::u16string_view::const_reverse_iterator;
   1930 
   1931 public:
   1932  /**
   1933   * @return an iterator to the first code unit in this string.
   1934   *     The iterator may be a pointer or a contiguous-iterator object.
   1935   * @draft ICU 78
   1936   */
   1937  unspecified_iterator begin() const { return std::u16string_view(*this).begin(); }
   1938  /**
   1939   * @return an iterator to just past the last code unit in this string.
   1940   *     The iterator may be a pointer or a contiguous-iterator object.
   1941   * @draft ICU 78
   1942   */
   1943  unspecified_iterator end() const { return std::u16string_view(*this).end(); }
   1944  /**
   1945   * @return a reverse iterator to the last code unit in this string.
   1946   *     The iterator may be a pointer or a contiguous-iterator object.
   1947   * @draft ICU 78
   1948   */
   1949  unspecified_reverse_iterator rbegin() const { return std::u16string_view(*this).rbegin(); }
   1950  /**
   1951   * @return a reverse iterator to just before the first code unit in this string.
   1952   *     The iterator may be a pointer or a contiguous-iterator object.
   1953   * @draft ICU 78
   1954   */
   1955  unspecified_reverse_iterator rend() const { return std::u16string_view(*this).rend(); }
   1956 #endif  // U_HIDE_DRAFT_API
   1957 
   1958  //========================================
   1959  // Write operations
   1960  //========================================
   1961 
   1962  /* Assignment operations */
   1963 
   1964  /**
   1965   * Assignment operator.  Replace the characters in this UnicodeString
   1966   * with the characters from `srcText`.
   1967   *
   1968   * Starting with ICU 2.4, the assignment operator and the copy constructor
   1969   * allocate a new buffer and copy the buffer contents even for readonly aliases.
   1970   * By contrast, the fastCopyFrom() function implements the old,
   1971   * more efficient but less safe behavior
   1972   * of making this string also a readonly alias to the same buffer.
   1973   *
   1974   * If the source object has an "open" buffer from getBuffer(minCapacity),
   1975   * then the copy is an empty string.
   1976   *
   1977   * @param srcText The text containing the characters to replace
   1978   * @return a reference to this
   1979   * @stable ICU 2.0
   1980   * @see fastCopyFrom
   1981   */
   1982  UnicodeString &operator=(const UnicodeString &srcText);
   1983 
   1984  /**
   1985   * Almost the same as the assignment operator.
   1986   * Replace the characters in this UnicodeString
   1987   * with the characters from `srcText`.
   1988   *
   1989   * This function works the same as the assignment operator
   1990   * for all strings except for ones that are readonly aliases.
   1991   *
   1992   * Starting with ICU 2.4, the assignment operator and the copy constructor
   1993   * allocate a new buffer and copy the buffer contents even for readonly aliases.
   1994   * This function implements the old, more efficient but less safe behavior
   1995   * of making this string also a readonly alias to the same buffer.
   1996   *
   1997   * The fastCopyFrom function must be used only if it is known that the lifetime of
   1998   * this UnicodeString does not exceed the lifetime of the aliased buffer
   1999   * including its contents, for example for strings from resource bundles
   2000   * or aliases to string constants.
   2001   *
   2002   * If the source object has an "open" buffer from getBuffer(minCapacity),
   2003   * then the copy is an empty string.
   2004   *
   2005   * @param src The text containing the characters to replace.
   2006   * @return a reference to this
   2007   * @stable ICU 2.4
   2008   */
   2009  UnicodeString &fastCopyFrom(const UnicodeString &src);
   2010 
   2011  /**
   2012   * Assignment operator. Replaces the characters in this UnicodeString
   2013   * with a copy of the characters from the `src`
   2014   * which is, or which is implicitly convertible to,
   2015   * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
   2016   *
   2017   * @param src The string view containing the characters to copy.
   2018   * @return a reference to this
   2019   * @stable ICU 76
   2020   */
   2021  template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
   2022  inline UnicodeString &operator=(const S &src) {
   2023    unBogus();
   2024    return doReplace(0, length(), internal::toU16StringView(src));
   2025  }
   2026 
   2027  /**
   2028   * Move assignment operator; might leave src in bogus state.
   2029   * This string will have the same contents and state that the source string had.
   2030   * The behavior is undefined if *this and src are the same object.
   2031   * @param src source string
   2032   * @return *this
   2033   * @stable ICU 56
   2034   */
   2035  UnicodeString &operator=(UnicodeString &&src) noexcept;
   2036 
   2037  /**
   2038   * Swap strings.
   2039   * @param other other string
   2040   * @stable ICU 56
   2041   */
   2042  void swap(UnicodeString &other) noexcept;
   2043 
   2044  /**
   2045   * Non-member UnicodeString swap function.
   2046   * @param s1 will get s2's contents and state
   2047   * @param s2 will get s1's contents and state
   2048   * @stable ICU 56
   2049   */
   2050  friend inline void U_EXPORT2
   2051  swap(UnicodeString &s1, UnicodeString &s2) noexcept {
   2052    s1.swap(s2);
   2053  }
   2054 
   2055  /**
   2056   * Assignment operator.  Replace the characters in this UnicodeString
   2057   * with the code unit `ch`.
   2058   * @param ch the code unit to replace
   2059   * @return a reference to this
   2060   * @stable ICU 2.0
   2061   */
   2062  inline UnicodeString& operator= (char16_t ch);
   2063 
   2064  /**
   2065   * Assignment operator.  Replace the characters in this UnicodeString
   2066   * with the code point `ch`.
   2067   * @param ch the code point to replace
   2068   * @return a reference to this
   2069   * @stable ICU 2.0
   2070   */
   2071  inline UnicodeString& operator= (UChar32 ch);
   2072 
   2073  /**
   2074   * Set the text in the UnicodeString object to the characters
   2075   * in `srcText` in the range
   2076   * [`srcStart`, `srcText.length()`).
   2077   * `srcText` is not modified.
   2078   * @param srcText the source for the new characters
   2079   * @param srcStart the offset into `srcText` where new characters
   2080   * will be obtained
   2081   * @return a reference to this
   2082   * @stable ICU 2.2
   2083   */
   2084  inline UnicodeString& setTo(const UnicodeString& srcText,
   2085               int32_t srcStart);
   2086 
   2087  /**
   2088   * Set the text in the UnicodeString object to the characters
   2089   * in `srcText` in the range
   2090   * [`srcStart`, `srcStart + srcLength`).
   2091   * `srcText` is not modified.
   2092   * @param srcText the source for the new characters
   2093   * @param srcStart the offset into `srcText` where new characters
   2094   * will be obtained
   2095   * @param srcLength the number of characters in `srcText` in the
   2096   * replace string.
   2097   * @return a reference to this
   2098   * @stable ICU 2.0
   2099   */
   2100  inline UnicodeString& setTo(const UnicodeString& srcText,
   2101               int32_t srcStart,
   2102               int32_t srcLength);
   2103 
   2104  /**
   2105   * Set the text in the UnicodeString object to the characters in
   2106   * `srcText`.
   2107   * `srcText` is not modified.
   2108   * @param srcText the source for the new characters
   2109   * @return a reference to this
   2110   * @stable ICU 2.0
   2111   */
   2112  inline UnicodeString& setTo(const UnicodeString& srcText);
   2113 
   2114  /**
   2115   * Set the characters in the UnicodeString object to the characters
   2116   * in `srcChars`. `srcChars` is not modified.
   2117   * @param srcChars the source for the new characters
   2118   * @param srcLength the number of Unicode characters in srcChars.
   2119   * @return a reference to this
   2120   * @stable ICU 2.0
   2121   */
   2122  inline UnicodeString& setTo(const char16_t *srcChars,
   2123               int32_t srcLength);
   2124 
   2125  /**
   2126   * Set the characters in the UnicodeString object to the code unit
   2127   * `srcChar`.
   2128   * @param srcChar the code unit which becomes the UnicodeString's character
   2129   * content
   2130   * @return a reference to this
   2131   * @stable ICU 2.0
   2132   */
   2133  inline UnicodeString& setTo(char16_t srcChar);
   2134 
   2135  /**
   2136   * Set the characters in the UnicodeString object to the code point
   2137   * `srcChar`.
   2138   * @param srcChar the code point which becomes the UnicodeString's character
   2139   * content
   2140   * @return a reference to this
   2141   * @stable ICU 2.0
   2142   */
   2143  inline UnicodeString& setTo(UChar32 srcChar);
   2144 
   2145  /**
   2146   * Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor.
   2147   * The text will be used for the UnicodeString object, but
   2148   * it will not be released when the UnicodeString is destroyed.
   2149   * This has copy-on-write semantics:
   2150   * When the string is modified, then the buffer is first copied into
   2151   * newly allocated memory.
   2152   * The aliased buffer is never modified.
   2153   *
   2154   * In an assignment to another UnicodeString, when using the copy constructor
   2155   * or the assignment operator, the text will be copied.
   2156   * When using fastCopyFrom(), the text will be aliased again,
   2157   * so that both strings then alias the same readonly-text.
   2158   *
   2159   * @param isTerminated specifies if `text` is `NUL`-terminated.
   2160   *                     This must be true if `textLength==-1`.
   2161   * @param text The characters to alias for the UnicodeString.
   2162   * @param textLength The number of Unicode characters in `text` to alias.
   2163   *                   If -1, then this constructor will determine the length
   2164   *                   by calling `u_strlen()`.
   2165   * @return a reference to this
   2166   * @stable ICU 2.0
   2167   */
   2168  UnicodeString &setTo(UBool isTerminated,
   2169                       ConstChar16Ptr text,
   2170                       int32_t textLength);
   2171 
   2172  /**
   2173   * Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor.
   2174   * The text will be used for the UnicodeString object, but
   2175   * it will not be released when the UnicodeString is destroyed.
   2176   * This has write-through semantics:
   2177   * For as long as the capacity of the buffer is sufficient, write operations
   2178   * will directly affect the buffer. When more capacity is necessary, then
   2179   * a new buffer will be allocated and the contents copied as with regularly
   2180   * constructed strings.
   2181   * In an assignment to another UnicodeString, the buffer will be copied.
   2182   * The extract(Char16Ptr dst) function detects whether the dst pointer is the same
   2183   * as the string buffer itself and will in this case not copy the contents.
   2184   *
   2185   * @param buffer The characters to alias for the UnicodeString.
   2186   * @param buffLength The number of Unicode characters in `buffer` to alias.
   2187   * @param buffCapacity The size of `buffer` in char16_ts.
   2188   * @return a reference to this
   2189   * @stable ICU 2.0
   2190   */
   2191  UnicodeString &setTo(char16_t *buffer,
   2192                       int32_t buffLength,
   2193                       int32_t buffCapacity);
   2194 
   2195  /**
   2196   * Make this UnicodeString object invalid.
   2197   * The string will test true with isBogus().
   2198   *
   2199   * A bogus string has no value. It is different from an empty string.
   2200   * It can be used to indicate that no string value is available.
   2201   * getBuffer() and getTerminatedBuffer() return nullptr, and
   2202   * length() returns 0.
   2203   *
   2204   * This utility function is used throughout the UnicodeString
   2205   * implementation to indicate that a UnicodeString operation failed,
   2206   * and may be used in other functions,
   2207   * especially but not exclusively when such functions do not
   2208   * take a UErrorCode for simplicity.
   2209   *
   2210   * The following methods, and no others, will clear a string object's bogus flag:
   2211   * - remove()
   2212   * - remove(0, INT32_MAX)
   2213   * - truncate(0)
   2214   * - operator=() (assignment operator)
   2215   * - setTo(...)
   2216   *
   2217   * The simplest ways to turn a bogus string into an empty one
   2218   * is to use the remove() function.
   2219   * Examples for other functions that are equivalent to "set to empty string":
   2220   * \code
   2221   * if(s.isBogus()) {
   2222   *   s.remove();           // set to an empty string (remove all), or
   2223   *   s.remove(0, INT32_MAX); // set to an empty string (remove all), or
   2224   *   s.truncate(0);        // set to an empty string (complete truncation), or
   2225   *   s=UnicodeString();    // assign an empty string, or
   2226   *   s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
   2227   *   s.setTo(u"", 0);      // set to an empty C Unicode string
   2228   * }
   2229   * \endcode
   2230   *
   2231   * @see isBogus()
   2232   * @stable ICU 2.0
   2233   */
   2234  void setToBogus();
   2235 
   2236  /**
   2237   * Set the character at the specified offset to the specified character.
   2238   * @param offset A valid offset into the text of the character to set
   2239   * @param ch The new character
   2240   * @return A reference to this
   2241   * @stable ICU 2.0
   2242   */
   2243  UnicodeString& setCharAt(int32_t offset,
   2244               char16_t ch);
   2245 
   2246 
   2247  /* Append operations */
   2248 
   2249  /**
   2250   * Append operator. Append the code unit `ch` to the UnicodeString
   2251   * object.
   2252   * @param ch the code unit to be appended
   2253   * @return a reference to this
   2254   * @stable ICU 2.0
   2255   */
   2256 inline  UnicodeString& operator+= (char16_t ch);
   2257 
   2258  /**
   2259   * Append operator. Append the code point `ch` to the UnicodeString
   2260   * object.
   2261   * @param ch the code point to be appended
   2262   * @return a reference to this
   2263   * @stable ICU 2.0
   2264   */
   2265 inline  UnicodeString& operator+= (UChar32 ch);
   2266 
   2267  /**
   2268   * Append operator. Append the characters in `srcText` to the
   2269   * UnicodeString object. `srcText` is not modified.
   2270   * @param srcText the source for the new characters
   2271   * @return a reference to this
   2272   * @stable ICU 2.0
   2273   */
   2274  inline UnicodeString& operator+= (const UnicodeString& srcText);
   2275 
   2276  /**
   2277   * Append operator. Appends the characters in `src`
   2278   * which is, or which is implicitly convertible to,
   2279   * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view,
   2280   * to the UnicodeString object.
   2281   *
   2282   * @param src the source for the new characters
   2283   * @return a reference to this
   2284   * @stable ICU 76
   2285   */
   2286  template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
   2287  inline UnicodeString& operator+=(const S &src) {
   2288    return doAppend(internal::toU16StringView(src));
   2289  }
   2290 
   2291  /**
   2292   * Append the characters
   2293   * in `srcText` in the range
   2294   * [`srcStart`, `srcStart + srcLength`) to the
   2295   * UnicodeString object at offset `start`. `srcText`
   2296   * is not modified.
   2297   * @param srcText the source for the new characters
   2298   * @param srcStart the offset into `srcText` where new characters
   2299   * will be obtained
   2300   * @param srcLength the number of characters in `srcText` in
   2301   * the append string
   2302   * @return a reference to this
   2303   * @stable ICU 2.0
   2304   */
   2305  inline UnicodeString& append(const UnicodeString& srcText,
   2306            int32_t srcStart,
   2307            int32_t srcLength);
   2308 
   2309  /**
   2310   * Append the characters in `srcText` to the UnicodeString object.
   2311   * `srcText` is not modified.
   2312   * @param srcText the source for the new characters
   2313   * @return a reference to this
   2314   * @stable ICU 2.0
   2315   */
   2316  inline UnicodeString& append(const UnicodeString& srcText);
   2317 
   2318  /**
   2319   * Append the characters in `srcChars` in the range
   2320   * [`srcStart`, `srcStart + srcLength`) to the UnicodeString
   2321   * object at offset
   2322   * `start`. `srcChars` is not modified.
   2323   * @param srcChars the source for the new characters
   2324   * @param srcStart the offset into `srcChars` where new characters
   2325   * will be obtained
   2326   * @param srcLength the number of characters in `srcChars` in
   2327   *                  the append string; can be -1 if `srcChars` is NUL-terminated
   2328   * @return a reference to this
   2329   * @stable ICU 2.0
   2330   */
   2331  inline UnicodeString& append(const char16_t *srcChars,
   2332            int32_t srcStart,
   2333            int32_t srcLength);
   2334 
   2335  /**
   2336   * Append the characters in `srcChars` to the UnicodeString object.
   2337   * `srcChars` is not modified.
   2338   * @param srcChars the source for the new characters
   2339   * @param srcLength the number of Unicode characters in `srcChars`;
   2340   *                  can be -1 if `srcChars` is NUL-terminated
   2341   * @return a reference to this
   2342   * @stable ICU 2.0
   2343   */
   2344  inline UnicodeString& append(ConstChar16Ptr srcChars,
   2345            int32_t srcLength);
   2346 
   2347  /**
   2348   * Appends the characters in `src`
   2349   * which is, or which is implicitly convertible to,
   2350   * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view,
   2351   * to the UnicodeString object.
   2352   *
   2353   * @param src the source for the new characters
   2354   * @return a reference to this
   2355   * @stable ICU 76
   2356   */
   2357  template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
   2358  inline UnicodeString& append(const S &src) {
   2359    return doAppend(internal::toU16StringView(src));
   2360  }
   2361 
   2362  /**
   2363   * Append the code unit `srcChar` to the UnicodeString object.
   2364   * @param srcChar the code unit to append
   2365   * @return a reference to this
   2366   * @stable ICU 2.0
   2367   */
   2368  inline UnicodeString& append(char16_t srcChar);
   2369 
   2370  /**
   2371   * Append the code point `srcChar` to the UnicodeString object.
   2372   * @param srcChar the code point to append
   2373   * @return a reference to this
   2374   * @stable ICU 2.0
   2375   */
   2376  UnicodeString& append(UChar32 srcChar);
   2377 
   2378 #ifndef U_HIDE_DRAFT_API
   2379  /**
   2380   * Appends the code unit `c` to the UnicodeString object.
   2381   * Same as append(c) except does not return *this.
   2382   *
   2383   * @param c the code unit to append
   2384   * @draft ICU 78
   2385   */
   2386  inline void push_back(char16_t c) { append(c); }
   2387 #endif  // U_HIDE_DRAFT_API
   2388 
   2389  /* Insert operations */
   2390 
   2391  /**
   2392   * Insert the characters in `srcText` in the range
   2393   * [`srcStart`, `srcStart + srcLength`) into the UnicodeString
   2394   * object at offset `start`. `srcText` is not modified.
   2395   * @param start the offset where the insertion begins
   2396   * @param srcText the source for the new characters
   2397   * @param srcStart the offset into `srcText` where new characters
   2398   * will be obtained
   2399   * @param srcLength the number of characters in `srcText` in
   2400   * the insert string
   2401   * @return a reference to this
   2402   * @stable ICU 2.0
   2403   */
   2404  inline UnicodeString& insert(int32_t start,
   2405            const UnicodeString& srcText,
   2406            int32_t srcStart,
   2407            int32_t srcLength);
   2408 
   2409  /**
   2410   * Insert the characters in `srcText` into the UnicodeString object
   2411   * at offset `start`. `srcText` is not modified.
   2412   * @param start the offset where the insertion begins
   2413   * @param srcText the source for the new characters
   2414   * @return a reference to this
   2415   * @stable ICU 2.0
   2416   */
   2417  inline UnicodeString& insert(int32_t start,
   2418            const UnicodeString& srcText);
   2419 
   2420  /**
   2421   * Insert the characters in `srcChars` in the range
   2422   * [`srcStart`, `srcStart + srcLength`) into the UnicodeString
   2423   *  object at offset `start`. `srcChars` is not modified.
   2424   * @param start the offset at which the insertion begins
   2425   * @param srcChars the source for the new characters
   2426   * @param srcStart the offset into `srcChars` where new characters
   2427   * will be obtained
   2428   * @param srcLength the number of characters in `srcChars`
   2429   * in the insert string
   2430   * @return a reference to this
   2431   * @stable ICU 2.0
   2432   */
   2433  inline UnicodeString& insert(int32_t start,
   2434            const char16_t *srcChars,
   2435            int32_t srcStart,
   2436            int32_t srcLength);
   2437 
   2438  /**
   2439   * Insert the characters in `srcChars` into the UnicodeString object
   2440   * at offset `start`. `srcChars` is not modified.
   2441   * @param start the offset where the insertion begins
   2442   * @param srcChars the source for the new characters
   2443   * @param srcLength the number of Unicode characters in srcChars.
   2444   * @return a reference to this
   2445   * @stable ICU 2.0
   2446   */
   2447  inline UnicodeString& insert(int32_t start,
   2448            ConstChar16Ptr srcChars,
   2449            int32_t srcLength);
   2450 
   2451  /**
   2452   * Insert the code unit `srcChar` into the UnicodeString object at
   2453   * offset `start`.
   2454   * @param start the offset at which the insertion occurs
   2455   * @param srcChar the code unit to insert
   2456   * @return a reference to this
   2457   * @stable ICU 2.0
   2458   */
   2459  inline UnicodeString& insert(int32_t start,
   2460            char16_t srcChar);
   2461 
   2462  /**
   2463   * Insert the code point `srcChar` into the UnicodeString object at
   2464   * offset `start`.
   2465   * @param start the offset at which the insertion occurs
   2466   * @param srcChar the code point to insert
   2467   * @return a reference to this
   2468   * @stable ICU 2.0
   2469   */
   2470  inline UnicodeString& insert(int32_t start,
   2471            UChar32 srcChar);
   2472 
   2473 
   2474  /* Replace operations */
   2475 
   2476  /**
   2477   * Replace the characters in the range
   2478   * [`start`, `start + length`) with the characters in
   2479   * `srcText` in the range
   2480   * [`srcStart`, `srcStart + srcLength`).
   2481   * `srcText` is not modified.
   2482   * @param start the offset at which the replace operation begins
   2483   * @param length the number of characters to replace. The character at
   2484   * `start + length` is not modified.
   2485   * @param srcText the source for the new characters
   2486   * @param srcStart the offset into `srcText` where new characters
   2487   * will be obtained
   2488   * @param srcLength the number of characters in `srcText` in
   2489   * the replace string
   2490   * @return a reference to this
   2491   * @stable ICU 2.0
   2492   */
   2493  inline UnicodeString& replace(int32_t start,
   2494             int32_t length,
   2495             const UnicodeString& srcText,
   2496             int32_t srcStart,
   2497             int32_t srcLength);
   2498 
   2499  /**
   2500   * Replace the characters in the range
   2501   * [`start`, `start + length`)
   2502   * with the characters in `srcText`.  `srcText` is
   2503   *  not modified.
   2504   * @param start the offset at which the replace operation begins
   2505   * @param length the number of characters to replace. The character at
   2506   * `start + length` is not modified.
   2507   * @param srcText the source for the new characters
   2508   * @return a reference to this
   2509   * @stable ICU 2.0
   2510   */
   2511  inline UnicodeString& replace(int32_t start,
   2512             int32_t length,
   2513             const UnicodeString& srcText);
   2514 
   2515  /**
   2516   * Replace the characters in the range
   2517   * [`start`, `start + length`) with the characters in
   2518   * `srcChars` in the range
   2519   * [`srcStart`, `srcStart + srcLength`). `srcChars`
   2520   * is not modified.
   2521   * @param start the offset at which the replace operation begins
   2522   * @param length the number of characters to replace.  The character at
   2523   * `start + length` is not modified.
   2524   * @param srcChars the source for the new characters
   2525   * @param srcStart the offset into `srcChars` where new characters
   2526   * will be obtained
   2527   * @param srcLength the number of characters in `srcChars`
   2528   * in the replace string
   2529   * @return a reference to this
   2530   * @stable ICU 2.0
   2531   */
   2532  inline UnicodeString& replace(int32_t start,
   2533             int32_t length,
   2534             const char16_t *srcChars,
   2535             int32_t srcStart,
   2536             int32_t srcLength);
   2537 
   2538  /**
   2539   * Replace the characters in the range
   2540   * [`start`, `start + length`) with the characters in
   2541   * `srcChars`.  `srcChars` is not modified.
   2542   * @param start the offset at which the replace operation begins
   2543   * @param length number of characters to replace.  The character at
   2544   * `start + length` is not modified.
   2545   * @param srcChars the source for the new characters
   2546   * @param srcLength the number of Unicode characters in srcChars
   2547   * @return a reference to this
   2548   * @stable ICU 2.0
   2549   */
   2550  inline UnicodeString& replace(int32_t start,
   2551             int32_t length,
   2552             ConstChar16Ptr srcChars,
   2553             int32_t srcLength);
   2554 
   2555  /**
   2556   * Replace the characters in the range
   2557   * [`start`, `start + length`) with the code unit
   2558   * `srcChar`.
   2559   * @param start the offset at which the replace operation begins
   2560   * @param length the number of characters to replace.  The character at
   2561   * `start + length` is not modified.
   2562   * @param srcChar the new code unit
   2563   * @return a reference to this
   2564   * @stable ICU 2.0
   2565   */
   2566  inline UnicodeString& replace(int32_t start,
   2567             int32_t length,
   2568             char16_t srcChar);
   2569 
   2570  /**
   2571   * Replace the characters in the range
   2572   * [`start`, `start + length`) with the code point
   2573   * `srcChar`.
   2574   * @param start the offset at which the replace operation begins
   2575   * @param length the number of characters to replace.  The character at
   2576   * `start + length` is not modified.
   2577   * @param srcChar the new code point
   2578   * @return a reference to this
   2579   * @stable ICU 2.0
   2580   */
   2581  UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
   2582 
   2583  /**
   2584   * Replace the characters in the range [`start`, `limit`)
   2585   * with the characters in `srcText`. `srcText` is not modified.
   2586   * @param start the offset at which the replace operation begins
   2587   * @param limit the offset immediately following the replace range
   2588   * @param srcText the source for the new characters
   2589   * @return a reference to this
   2590   * @stable ICU 2.0
   2591   */
   2592  inline UnicodeString& replaceBetween(int32_t start,
   2593                int32_t limit,
   2594                const UnicodeString& srcText);
   2595 
   2596  /**
   2597   * Replace the characters in the range [`start`, `limit`)
   2598   * with the characters in `srcText` in the range
   2599   * [`srcStart`, `srcLimit`). `srcText` is not modified.
   2600   * @param start the offset at which the replace operation begins
   2601   * @param limit the offset immediately following the replace range
   2602   * @param srcText the source for the new characters
   2603   * @param srcStart the offset into `srcChars` where new characters
   2604   * will be obtained
   2605   * @param srcLimit the offset immediately following the range to copy
   2606   * in `srcText`
   2607   * @return a reference to this
   2608   * @stable ICU 2.0
   2609   */
   2610  inline UnicodeString& replaceBetween(int32_t start,
   2611                int32_t limit,
   2612                const UnicodeString& srcText,
   2613                int32_t srcStart,
   2614                int32_t srcLimit);
   2615 
   2616  /**
   2617   * Replace a substring of this object with the given text.
   2618   * @param start the beginning index, inclusive; `0 <= start <= limit`.
   2619   * @param limit the ending index, exclusive; `start <= limit <= length()`.
   2620   * @param text the text to replace characters `start` to `limit - 1`
   2621   * @stable ICU 2.0
   2622   */
   2623  virtual void handleReplaceBetween(int32_t start,
   2624                                    int32_t limit,
   2625                                    const UnicodeString& text) override;
   2626 
   2627  /**
   2628   * Replaceable API
   2629   * @return true if it has MetaData
   2630   * @stable ICU 2.4
   2631   */
   2632  virtual UBool hasMetaData() const override;
   2633 
   2634  /**
   2635   * Copy a substring of this object, retaining attribute (out-of-band)
   2636   * information.  This method is used to duplicate or reorder substrings.
   2637   * The destination index must not overlap the source range.
   2638   *
   2639   * @param start the beginning index, inclusive; `0 <= start <= limit`.
   2640   * @param limit the ending index, exclusive; `start <= limit <= length()`.
   2641   * @param dest the destination index.  The characters from
   2642   *             `start..limit-1` will be copied to `dest`.
   2643   * Implementations of this method may assume that `dest <= start ||
   2644   * dest >= limit`.
   2645   * @stable ICU 2.0
   2646   */
   2647  virtual void copy(int32_t start, int32_t limit, int32_t dest) override;
   2648 
   2649  /* Search and replace operations */
   2650 
   2651  /**
   2652   * Replace all occurrences of characters in oldText with the characters
   2653   * in newText
   2654   * @param oldText the text containing the search text
   2655   * @param newText the text containing the replacement text
   2656   * @return a reference to this
   2657   * @stable ICU 2.0
   2658   */
   2659  inline UnicodeString& findAndReplace(const UnicodeString& oldText,
   2660                const UnicodeString& newText);
   2661 
   2662  /**
   2663   * Replace all occurrences of characters in oldText with characters
   2664   * in newText
   2665   * in the range [`start`, `start + length`).
   2666   * @param start the start of the range in which replace will performed
   2667   * @param length the length of the range in which replace will be performed
   2668   * @param oldText the text containing the search text
   2669   * @param newText the text containing the replacement text
   2670   * @return a reference to this
   2671   * @stable ICU 2.0
   2672   */
   2673  inline UnicodeString& findAndReplace(int32_t start,
   2674                int32_t length,
   2675                const UnicodeString& oldText,
   2676                const UnicodeString& newText);
   2677 
   2678  /**
   2679   * Replace all occurrences of characters in oldText in the range
   2680   * [`oldStart`, `oldStart + oldLength`) with the characters
   2681   * in newText in the range
   2682   * [`newStart`, `newStart + newLength`)
   2683   * in the range [`start`, `start + length`).
   2684   * @param start the start of the range in which replace will performed
   2685   * @param length the length of the range in which replace will be performed
   2686   * @param oldText the text containing the search text
   2687   * @param oldStart the start of the search range in `oldText`
   2688   * @param oldLength the length of the search range in `oldText`
   2689   * @param newText the text containing the replacement text
   2690   * @param newStart the start of the replacement range in `newText`
   2691   * @param newLength the length of the replacement range in `newText`
   2692   * @return a reference to this
   2693   * @stable ICU 2.0
   2694   */
   2695  UnicodeString& findAndReplace(int32_t start,
   2696                int32_t length,
   2697                const UnicodeString& oldText,
   2698                int32_t oldStart,
   2699                int32_t oldLength,
   2700                const UnicodeString& newText,
   2701                int32_t newStart,
   2702                int32_t newLength);
   2703 
   2704 
   2705  /* Remove operations */
   2706 
   2707  /**
   2708   * Removes all characters from the UnicodeString object and clears the bogus flag.
   2709   * This is the UnicodeString equivalent of std::string’s clear().
   2710   *
   2711   * @return a reference to this
   2712   * @see setToBogus
   2713   * @stable ICU 2.0
   2714   */
   2715  inline UnicodeString& remove();
   2716 
   2717  /**
   2718   * Remove the characters in the range
   2719   * [`start`, `start + length`) from the UnicodeString object.
   2720   * @param start the offset of the first character to remove
   2721   * @param length the number of characters to remove
   2722   * @return a reference to this
   2723   * @stable ICU 2.0
   2724   */
   2725  inline UnicodeString& remove(int32_t start,
   2726                               int32_t length = static_cast<int32_t>(INT32_MAX));
   2727 
   2728  /**
   2729   * Remove the characters in the range
   2730   * [`start`, `limit`) from the UnicodeString object.
   2731   * @param start the offset of the first character to remove
   2732   * @param limit the offset immediately following the range to remove
   2733   * @return a reference to this
   2734   * @stable ICU 2.0
   2735   */
   2736  inline UnicodeString& removeBetween(int32_t start,
   2737                                      int32_t limit = static_cast<int32_t>(INT32_MAX));
   2738 
   2739  /**
   2740   * Retain only the characters in the range
   2741   * [`start`, `limit`) from the UnicodeString object.
   2742   * Removes characters before `start` and at and after `limit`.
   2743   * @param start the offset of the first character to retain
   2744   * @param limit the offset immediately following the range to retain
   2745   * @return a reference to this
   2746   * @stable ICU 4.4
   2747   */
   2748  inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
   2749 
   2750  /* Length operations */
   2751 
   2752  /**
   2753   * Pad the start of this UnicodeString with the character `padChar`.
   2754   * If the length of this UnicodeString is less than targetLength,
   2755   * length() - targetLength copies of padChar will be added to the
   2756   * beginning of this UnicodeString.
   2757   * @param targetLength the desired length of the string
   2758   * @param padChar the character to use for padding. Defaults to
   2759   * space (U+0020)
   2760   * @return true if the text was padded, false otherwise.
   2761   * @stable ICU 2.0
   2762   */
   2763  UBool padLeading(int32_t targetLength,
   2764                    char16_t padChar = 0x0020);
   2765 
   2766  /**
   2767   * Pad the end of this UnicodeString with the character `padChar`.
   2768   * If the length of this UnicodeString is less than targetLength,
   2769   * length() - targetLength copies of padChar will be added to the
   2770   * end of this UnicodeString.
   2771   * @param targetLength the desired length of the string
   2772   * @param padChar the character to use for padding. Defaults to
   2773   * space (U+0020)
   2774   * @return true if the text was padded, false otherwise.
   2775   * @stable ICU 2.0
   2776   */
   2777  UBool padTrailing(int32_t targetLength,
   2778                     char16_t padChar = 0x0020);
   2779 
   2780  /**
   2781   * Truncate this UnicodeString to the `targetLength`.
   2782   * @param targetLength the desired length of this UnicodeString.
   2783   * @return true if the text was truncated, false otherwise
   2784   * @stable ICU 2.0
   2785   */
   2786  inline UBool truncate(int32_t targetLength);
   2787 
   2788  /**
   2789   * Trims leading and trailing whitespace from this UnicodeString.
   2790   * @return a reference to this
   2791   * @stable ICU 2.0
   2792   */
   2793  UnicodeString& trim();
   2794 
   2795  /* Miscellaneous operations */
   2796 
   2797  /**
   2798   * Reverse this UnicodeString in place.
   2799   * @return a reference to this
   2800   * @stable ICU 2.0
   2801   */
   2802  inline UnicodeString& reverse();
   2803 
   2804  /**
   2805   * Reverse the range [`start`, `start + length`) in
   2806   * this UnicodeString.
   2807   * @param start the start of the range to reverse
   2808   * @param length the number of characters to to reverse
   2809   * @return a reference to this
   2810   * @stable ICU 2.0
   2811   */
   2812  inline UnicodeString& reverse(int32_t start,
   2813             int32_t length);
   2814 
   2815  /**
   2816   * Convert the characters in this to UPPER CASE following the conventions of
   2817   * the default locale.
   2818   * @return A reference to this.
   2819   * @stable ICU 2.0
   2820   */
   2821  UnicodeString& toUpper();
   2822 
   2823  /**
   2824   * Convert the characters in this to UPPER CASE following the conventions of
   2825   * a specific locale.
   2826   * @param locale The locale containing the conventions to use.
   2827   * @return A reference to this.
   2828   * @stable ICU 2.0
   2829   */
   2830  UnicodeString& toUpper(const Locale& locale);
   2831 
   2832  /**
   2833   * Convert the characters in this to lower case following the conventions of
   2834   * the default locale.
   2835   * @return A reference to this.
   2836   * @stable ICU 2.0
   2837   */
   2838  UnicodeString& toLower();
   2839 
   2840  /**
   2841   * Convert the characters in this to lower case following the conventions of
   2842   * a specific locale.
   2843   * @param locale The locale containing the conventions to use.
   2844   * @return A reference to this.
   2845   * @stable ICU 2.0
   2846   */
   2847  UnicodeString& toLower(const Locale& locale);
   2848 
   2849 #if !UCONFIG_NO_BREAK_ITERATION
   2850 
   2851  /**
   2852   * Titlecase this string, convenience function using the default locale.
   2853   *
   2854   * Casing is locale-dependent and context-sensitive.
   2855   * Titlecasing uses a break iterator to find the first characters of words
   2856   * that are to be titlecased. It titlecases those characters and lowercases
   2857   * all others.
   2858   *
   2859   * The titlecase break iterator can be provided to customize for arbitrary
   2860   * styles, using rules and dictionaries beyond the standard iterators.
   2861   * It may be more efficient to always provide an iterator to avoid
   2862   * opening and closing one for each string.
   2863   * If the break iterator passed in is null, the default Unicode algorithm
   2864   * will be used to determine the titlecase positions.
   2865   *
   2866   * This function uses only the setText(), first() and next() methods of the
   2867   * provided break iterator.
   2868   *
   2869   * @param titleIter A break iterator to find the first characters of words
   2870   *                  that are to be titlecased.
   2871   *                  If none is provided (0), then a standard titlecase
   2872   *                  break iterator is opened.
   2873   *                  Otherwise the provided iterator is set to the string's text.
   2874   * @return A reference to this.
   2875   * @stable ICU 2.1
   2876   */
   2877  UnicodeString &toTitle(BreakIterator *titleIter);
   2878 
   2879  /**
   2880   * Titlecase this string.
   2881   *
   2882   * Casing is locale-dependent and context-sensitive.
   2883   * Titlecasing uses a break iterator to find the first characters of words
   2884   * that are to be titlecased. It titlecases those characters and lowercases
   2885   * all others.
   2886   *
   2887   * The titlecase break iterator can be provided to customize for arbitrary
   2888   * styles, using rules and dictionaries beyond the standard iterators.
   2889   * It may be more efficient to always provide an iterator to avoid
   2890   * opening and closing one for each string.
   2891   * If the break iterator passed in is null, the default Unicode algorithm
   2892   * will be used to determine the titlecase positions.
   2893   *
   2894   * This function uses only the setText(), first() and next() methods of the
   2895   * provided break iterator.
   2896   *
   2897   * @param titleIter A break iterator to find the first characters of words
   2898   *                  that are to be titlecased.
   2899   *                  If none is provided (0), then a standard titlecase
   2900   *                  break iterator is opened.
   2901   *                  Otherwise the provided iterator is set to the string's text.
   2902   * @param locale    The locale to consider.
   2903   * @return A reference to this.
   2904   * @stable ICU 2.1
   2905   */
   2906  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
   2907 
   2908  /**
   2909   * Titlecase this string, with options.
   2910   *
   2911   * Casing is locale-dependent and context-sensitive.
   2912   * Titlecasing uses a break iterator to find the first characters of words
   2913   * that are to be titlecased. It titlecases those characters and lowercases
   2914   * all others. (This can be modified with options.)
   2915   *
   2916   * The titlecase break iterator can be provided to customize for arbitrary
   2917   * styles, using rules and dictionaries beyond the standard iterators.
   2918   * It may be more efficient to always provide an iterator to avoid
   2919   * opening and closing one for each string.
   2920   * If the break iterator passed in is null, the default Unicode algorithm
   2921   * will be used to determine the titlecase positions.
   2922   *
   2923   * This function uses only the setText(), first() and next() methods of the
   2924   * provided break iterator.
   2925   *
   2926   * @param titleIter A break iterator to find the first characters of words
   2927   *                  that are to be titlecased.
   2928   *                  If none is provided (0), then a standard titlecase
   2929   *                  break iterator is opened.
   2930   *                  Otherwise the provided iterator is set to the string's text.
   2931   * @param locale    The locale to consider.
   2932   * @param options   Options bit set, usually 0. See U_TITLECASE_NO_LOWERCASE,
   2933   *                  U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
   2934   *                  U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
   2935   * @return A reference to this.
   2936   * @stable ICU 3.8
   2937   */
   2938  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
   2939 
   2940 #endif
   2941 
   2942  /**
   2943   * Case-folds the characters in this string.
   2944   *
   2945   * Case-folding is locale-independent and not context-sensitive,
   2946   * but there is an option for whether to include or exclude mappings for dotted I
   2947   * and dotless i that are marked with 'T' in CaseFolding.txt.
   2948   *
   2949   * The result may be longer or shorter than the original.
   2950   *
   2951   * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
   2952   * @return A reference to this.
   2953   * @stable ICU 2.0
   2954   */
   2955  UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
   2956 
   2957  //========================================
   2958  // Access to the internal buffer
   2959  //========================================
   2960 
   2961  /**
   2962   * Get a read/write pointer to the internal buffer.
   2963   * The buffer is guaranteed to be large enough for at least minCapacity char16_ts,
   2964   * writable, and is still owned by the UnicodeString object.
   2965   * Calls to getBuffer(minCapacity) must not be nested, and
   2966   * must be matched with calls to releaseBuffer(newLength).
   2967   * If the string buffer was read-only or shared,
   2968   * then it will be reallocated and copied.
   2969   *
   2970   * An attempted nested call will return 0, and will not further modify the
   2971   * state of the UnicodeString object.
   2972   * It also returns 0 if the string is bogus.
   2973   *
   2974   * The actual capacity of the string buffer may be larger than minCapacity.
   2975   * getCapacity() returns the actual capacity.
   2976   * For many operations, the full capacity should be used to avoid reallocations.
   2977   *
   2978   * While the buffer is "open" between getBuffer(minCapacity)
   2979   * and releaseBuffer(newLength), the following applies:
   2980   * - The string length is set to 0.
   2981   * - Any read API call on the UnicodeString object will behave like on a 0-length string.
   2982   * - Any write API call on the UnicodeString object is disallowed and will have no effect.
   2983   * - You can read from and write to the returned buffer.
   2984   * - The previous string contents will still be in the buffer;
   2985   *   if you want to use it, then you need to call length() before getBuffer(minCapacity).
   2986   *   If the length() was greater than minCapacity, then any contents after minCapacity
   2987   *   may be lost.
   2988   *   The buffer contents is not NUL-terminated by getBuffer().
   2989   *   If length() < getCapacity() then you can terminate it by writing a NUL
   2990   *   at index length().
   2991   * - You must call releaseBuffer(newLength) before and in order to
   2992   *   return to normal UnicodeString operation.
   2993   *
   2994   * @param minCapacity the minimum number of char16_ts that are to be available
   2995   *        in the buffer, starting at the returned pointer;
   2996   *        default to the current string capacity if minCapacity==-1
   2997   * @return a writable pointer to the internal string buffer,
   2998   *         or nullptr if an error occurs (nested calls, out of memory)
   2999   *
   3000   * @see releaseBuffer
   3001   * @see getTerminatedBuffer()
   3002   * @stable ICU 2.0
   3003   */
   3004  char16_t *getBuffer(int32_t minCapacity);
   3005 
   3006  /**
   3007   * Release a read/write buffer on a UnicodeString object with an
   3008   * "open" getBuffer(minCapacity).
   3009   * This function must be called in a matched pair with getBuffer(minCapacity).
   3010   * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
   3011   *
   3012   * It will set the string length to newLength, at most to the current capacity.
   3013   * If newLength==-1 then it will set the length according to the
   3014   * first NUL in the buffer, or to the capacity if there is no NUL.
   3015   *
   3016   * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
   3017   *
   3018   * @param newLength the new length of the UnicodeString object;
   3019   *        defaults to the current capacity if newLength is greater than that;
   3020   *        if newLength==-1, it defaults to u_strlen(buffer) but not more than
   3021   *        the current capacity of the string
   3022   *
   3023   * @see getBuffer(int32_t minCapacity)
   3024   * @stable ICU 2.0
   3025   */
   3026  void releaseBuffer(int32_t newLength=-1);
   3027 
   3028  /**
   3029   * Get a read-only pointer to the internal buffer.
   3030   * This can be called at any time on a valid UnicodeString.
   3031   *
   3032   * It returns 0 if the string is bogus, or
   3033   * during an "open" getBuffer(minCapacity).
   3034   *
   3035   * It can be called as many times as desired.
   3036   * The pointer that it returns will remain valid until the UnicodeString object is modified,
   3037   * at which time the pointer is semantically invalidated and must not be used any more.
   3038   *
   3039   * The capacity of the buffer can be determined with getCapacity().
   3040   * The part after length() may or may not be initialized and valid,
   3041   * depending on the history of the UnicodeString object.
   3042   *
   3043   * The buffer contents is (probably) not NUL-terminated.
   3044   * You can check if it is with
   3045   * `(s.length() < s.getCapacity() && buffer[s.length()]==0)`.
   3046   * (See getTerminatedBuffer().)
   3047   *
   3048   * The buffer may reside in read-only memory. Its contents must not
   3049   * be modified.
   3050   *
   3051   * @return a read-only pointer to the internal string buffer,
   3052   *         or nullptr if the string is empty or bogus
   3053   *
   3054   * @see getBuffer(int32_t minCapacity)
   3055   * @see getTerminatedBuffer()
   3056   * @stable ICU 2.0
   3057   */
   3058  inline const char16_t *getBuffer() const;
   3059 
   3060  /**
   3061   * Get a read-only pointer to the internal buffer,
   3062   * making sure that it is NUL-terminated.
   3063   * This can be called at any time on a valid UnicodeString.
   3064   *
   3065   * It returns 0 if the string is bogus, or
   3066   * during an "open" getBuffer(minCapacity), or if the buffer cannot
   3067   * be NUL-terminated (because memory allocation failed).
   3068   *
   3069   * It can be called as many times as desired.
   3070   * The pointer that it returns will remain valid until the UnicodeString object is modified,
   3071   * at which time the pointer is semantically invalidated and must not be used any more.
   3072   *
   3073   * The capacity of the buffer can be determined with getCapacity().
   3074   * The part after length()+1 may or may not be initialized and valid,
   3075   * depending on the history of the UnicodeString object.
   3076   *
   3077   * The buffer contents is guaranteed to be NUL-terminated.
   3078   * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
   3079   * is written.
   3080   * For this reason, this function is not const, unlike getBuffer().
   3081   * Note that a UnicodeString may also contain NUL characters as part of its contents.
   3082   *
   3083   * The buffer may reside in read-only memory. Its contents must not
   3084   * be modified.
   3085   *
   3086   * @return a read-only pointer to the internal string buffer,
   3087   *         or 0 if the string is empty or bogus
   3088   *
   3089   * @see getBuffer(int32_t minCapacity)
   3090   * @see getBuffer()
   3091   * @stable ICU 2.2
   3092   */
   3093  const char16_t *getTerminatedBuffer();
   3094 
   3095  /**
   3096   * Converts to a std::u16string_view.
   3097   *
   3098   * @return a string view of the contents of this string
   3099   * @stable ICU 76
   3100   */
   3101  inline operator std::u16string_view() const {
   3102    return {getBuffer(), static_cast<std::u16string_view::size_type>(length())};
   3103  }
   3104 
   3105 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
   3106  /**
   3107   * Converts to a std::wstring_view.
   3108   *
   3109   * Note: This should remain draft until C++ standard plans
   3110   * about char16_t vs. wchar_t become clearer.
   3111   *
   3112   * @return a string view of the contents of this string
   3113   * @stable ICU 76
   3114   */
   3115  inline operator std::wstring_view() const {
   3116    const char16_t *p = getBuffer();
   3117 #ifdef U_ALIASING_BARRIER
   3118    U_ALIASING_BARRIER(p);
   3119 #endif
   3120    return { reinterpret_cast<const wchar_t *>(p), (std::wstring_view::size_type)length() };
   3121  }
   3122 #endif  // U_SIZEOF_WCHAR_T
   3123 
   3124  //========================================
   3125  // Constructors
   3126  //========================================
   3127 
   3128  /** Construct an empty UnicodeString.
   3129   * @stable ICU 2.0
   3130   */
   3131  inline UnicodeString();
   3132 
   3133  /**
   3134   * Construct a UnicodeString with capacity to hold `capacity` char16_ts
   3135   * @param capacity the number of char16_ts this UnicodeString should hold
   3136   * before a resize is necessary; if count is greater than 0 and count
   3137   * code points c take up more space than capacity, then capacity is adjusted
   3138   * accordingly.
   3139   * @param c is used to initially fill the string
   3140   * @param count specifies how many code points c are to be written in the
   3141   *              string
   3142   * @stable ICU 2.0
   3143   */
   3144  UnicodeString(int32_t capacity, UChar32 c, int32_t count);
   3145 
   3146  /**
   3147   * Single char16_t (code unit) constructor.
   3148   *
   3149   * It is recommended to mark this constructor "explicit" by
   3150   * `-DUNISTR_FROM_CHAR_EXPLICIT=explicit`
   3151   * on the compiler command line or similar.
   3152   * @param ch the character to place in the UnicodeString
   3153   * @stable ICU 2.0
   3154   */
   3155  UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch);
   3156 
   3157  /**
   3158   * Single UChar32 (code point) constructor.
   3159   *
   3160   * It is recommended to mark this constructor "explicit" by
   3161   * `-DUNISTR_FROM_CHAR_EXPLICIT=explicit`
   3162   * on the compiler command line or similar.
   3163   * @param ch the character to place in the UnicodeString
   3164   * @stable ICU 2.0
   3165   */
   3166  UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
   3167 
   3168 #ifdef U_HIDE_DRAFT_API
   3169  /**
   3170   * char16_t* constructor.
   3171   *
   3172   * It is recommended to mark this constructor "explicit" by
   3173   * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
   3174   * on the compiler command line or similar.
   3175   *
   3176   * Note, for string literals:
   3177   * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
   3178   * length determination:
   3179   * \code
   3180   * UnicodeString str(u"literal");
   3181   * if (str == u"other literal") { ... }
   3182   * \endcode
   3183   *
   3184   * @param text The characters to place in the UnicodeString.  `text`
   3185   * must be NUL (U+0000) terminated.
   3186   * @stable ICU 2.0
   3187   */
   3188  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text) :
   3189      UnicodeString(text, -1) {}
   3190 #endif  // U_HIDE_DRAFT_API
   3191 
   3192 #if !U_CHAR16_IS_TYPEDEF && \
   3193    (defined(U_HIDE_DRAFT_API) || (defined(_LIBCPP_VERSION) && _LIBCPP_VERSION >= 180000))
   3194  /**
   3195   * uint16_t * constructor.
   3196   * Delegates to UnicodeString(const char16_t *).
   3197   *
   3198   * It is recommended to mark this constructor "explicit" by
   3199   * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
   3200   * on the compiler command line or similar.
   3201   *
   3202   * Note, for string literals:
   3203   * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
   3204   * length determination:
   3205   * \code
   3206   * UnicodeString str(u"literal");
   3207   * if (str == u"other literal") { ... }
   3208   * \endcode
   3209   *
   3210   * @param text NUL-terminated UTF-16 string
   3211   * @stable ICU 59
   3212   */
   3213  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) :
   3214      UnicodeString(ConstChar16Ptr(text), -1) {}
   3215 #endif
   3216 
   3217 #if defined(U_HIDE_DRAFT_API) && (U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN))
   3218  /**
   3219   * wchar_t * constructor.
   3220   * (Only defined if U_SIZEOF_WCHAR_T==2.)
   3221   * Delegates to UnicodeString(const char16_t *).
   3222   *
   3223   * It is recommended to mark this constructor "explicit" by
   3224   * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
   3225   * on the compiler command line or similar.
   3226   *
   3227   * Note, for string literals:
   3228   * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
   3229   * length determination:
   3230   * \code
   3231   * UnicodeString str(u"literal");
   3232   * if (str == u"other literal") { ... }
   3233   * \endcode
   3234   *
   3235   * @param text NUL-terminated UTF-16 string
   3236   * @stable ICU 59
   3237   */
   3238  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) :
   3239      UnicodeString(ConstChar16Ptr(text), -1) {}
   3240 #endif
   3241 
   3242  /**
   3243   * nullptr_t constructor.
   3244   * Effectively the same as the default constructor, makes an empty string object.
   3245   *
   3246   * It is recommended to mark this constructor "explicit" by
   3247   * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
   3248   * on the compiler command line or similar.
   3249   * @param text nullptr
   3250   * @stable ICU 59
   3251   */
   3252  UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text);
   3253 
   3254  /**
   3255   * char16_t* constructor.
   3256   *
   3257   * Note, for string literals:
   3258   * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
   3259   * length determination:
   3260   * \code
   3261   * UnicodeString str(u"literal");
   3262   * if (str == u"other literal") { ... }
   3263   * \endcode
   3264   *
   3265   * @param text The characters to place in the UnicodeString.
   3266   * @param textLength The number of Unicode characters in `text`
   3267   * to copy.
   3268   * @stable ICU 2.0
   3269   */
   3270  UnicodeString(const char16_t *text,
   3271        int32_t textLength);
   3272 
   3273 #if !U_CHAR16_IS_TYPEDEF
   3274  /**
   3275   * uint16_t * constructor.
   3276   * Delegates to UnicodeString(const char16_t *, int32_t).
   3277   *
   3278   * Note, for string literals:
   3279   * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
   3280   * length determination:
   3281   * \code
   3282   * UnicodeString str(u"literal");
   3283   * if (str == u"other literal") { ... }
   3284   * \endcode
   3285   *
   3286   * @param text UTF-16 string
   3287   * @param textLength string length
   3288   * @stable ICU 59
   3289   */
   3290  UnicodeString(const uint16_t *text, int32_t textLength) :
   3291      UnicodeString(ConstChar16Ptr(text), textLength) {}
   3292 #endif
   3293 
   3294 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
   3295  /**
   3296   * wchar_t * constructor.
   3297   * (Only defined if U_SIZEOF_WCHAR_T==2.)
   3298   * Delegates to UnicodeString(const char16_t *, int32_t).
   3299   *
   3300   * Note, for string literals:
   3301   * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
   3302   * length determination:
   3303   * \code
   3304   * UnicodeString str(u"literal");
   3305   * if (str == u"other literal") { ... }
   3306   * \endcode
   3307   *
   3308   * @param text UTF-16 string
   3309   * @param textLength string length
   3310   * @stable ICU 59
   3311   */
   3312  UnicodeString(const wchar_t *text, int32_t textLength) :
   3313      UnicodeString(ConstChar16Ptr(text), textLength) {}
   3314 #endif
   3315 
   3316  /**
   3317   * nullptr_t constructor.
   3318   * Effectively the same as the default constructor, makes an empty string object.
   3319   * @param text nullptr
   3320   * @param textLength ignored
   3321   * @stable ICU 59
   3322   */
   3323  inline UnicodeString(const std::nullptr_t text, int32_t textLength);
   3324 
   3325  /**
   3326   * Constructor from `text`
   3327   * which is, or which is implicitly convertible to,
   3328   * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
   3329   * The string is bogus if the string view is too long.
   3330   *
   3331   * If you need a UnicodeString but need not copy the string view contents,
   3332   * then you can call the UnicodeString::readOnlyAlias() function instead of this constructor.
   3333   *
   3334   * @param text UTF-16 string
   3335   * @stable ICU 76
   3336   */
   3337  template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
   3338  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const S &text) {
   3339    fUnion.fFields.fLengthAndFlags = kShortString;
   3340    doAppend(internal::toU16StringViewNullable(text));
   3341  }
   3342 
   3343  /**
   3344   * Readonly-aliasing char16_t* constructor.
   3345   * The text will be used for the UnicodeString object, but
   3346   * it will not be released when the UnicodeString is destroyed.
   3347   * This has copy-on-write semantics:
   3348   * When the string is modified, then the buffer is first copied into
   3349   * newly allocated memory.
   3350   * The aliased buffer is never modified.
   3351   *
   3352   * In an assignment to another UnicodeString, when using the copy constructor
   3353   * or the assignment operator, the text will be copied.
   3354   * When using fastCopyFrom(), the text will be aliased again,
   3355   * so that both strings then alias the same readonly-text.
   3356   *
   3357   * Note, for string literals:
   3358   * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
   3359   * length determination:
   3360   * \code
   3361   * UnicodeString alias = UnicodeString::readOnlyAlias(u"literal");
   3362   * if (str == u"other literal") { ... }
   3363   * \endcode
   3364   *
   3365   * @param isTerminated specifies if `text` is `NUL`-terminated.
   3366   *                     This must be true if `textLength==-1`.
   3367   * @param text The characters to alias for the UnicodeString.
   3368   * @param textLength The number of Unicode characters in `text` to alias.
   3369   *                   If -1, then this constructor will determine the length
   3370   *                   by calling `u_strlen()`.
   3371   * @stable ICU 2.0
   3372   */
   3373  UnicodeString(UBool isTerminated,
   3374                ConstChar16Ptr text,
   3375                int32_t textLength);
   3376 
   3377  /**
   3378   * Writable-aliasing char16_t* constructor.
   3379   * The text will be used for the UnicodeString object, but
   3380   * it will not be released when the UnicodeString is destroyed.
   3381   * This has write-through semantics:
   3382   * For as long as the capacity of the buffer is sufficient, write operations
   3383   * will directly affect the buffer. When more capacity is necessary, then
   3384   * a new buffer will be allocated and the contents copied as with regularly
   3385   * constructed strings.
   3386   * In an assignment to another UnicodeString, the buffer will be copied.
   3387   * The extract(Char16Ptr dst) function detects whether the dst pointer is the same
   3388   * as the string buffer itself and will in this case not copy the contents.
   3389   *
   3390   * @param buffer The characters to alias for the UnicodeString.
   3391   * @param buffLength The number of Unicode characters in `buffer` to alias.
   3392   * @param buffCapacity The size of `buffer` in char16_ts.
   3393   * @stable ICU 2.0
   3394   */
   3395  UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity);
   3396 
   3397 #if !U_CHAR16_IS_TYPEDEF
   3398  /**
   3399   * Writable-aliasing uint16_t * constructor.
   3400   * Delegates to UnicodeString(const char16_t *, int32_t, int32_t).
   3401   * @param buffer writable buffer of/for UTF-16 text
   3402   * @param buffLength length of the current buffer contents
   3403   * @param buffCapacity buffer capacity
   3404   * @stable ICU 59
   3405   */
   3406  UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) :
   3407      UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
   3408 #endif
   3409 
   3410 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
   3411  /**
   3412   * Writable-aliasing wchar_t * constructor.
   3413   * (Only defined if U_SIZEOF_WCHAR_T==2.)
   3414   * Delegates to UnicodeString(const char16_t *, int32_t, int32_t).
   3415   * @param buffer writable buffer of/for UTF-16 text
   3416   * @param buffLength length of the current buffer contents
   3417   * @param buffCapacity buffer capacity
   3418   * @stable ICU 59
   3419   */
   3420  UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) :
   3421      UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
   3422 #endif
   3423 
   3424  /**
   3425   * Writable-aliasing nullptr_t constructor.
   3426   * Effectively the same as the default constructor, makes an empty string object.
   3427   * @param buffer nullptr
   3428   * @param buffLength ignored
   3429   * @param buffCapacity ignored
   3430   * @stable ICU 59
   3431   */
   3432  inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity);
   3433 
   3434 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
   3435 
   3436  /**
   3437   * char* constructor.
   3438   * Uses the default converter (and thus depends on the ICU conversion code)
   3439   * unless U_CHARSET_IS_UTF8 is set to 1.
   3440   *
   3441   * For ASCII (really "invariant character") strings it is more efficient to use
   3442   * the constructor that takes a US_INV (for its enum EInvariant).
   3443   *
   3444   * Note, for string literals:
   3445   * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
   3446   * length determination:
   3447   * \code
   3448   * UnicodeString str(u"literal");
   3449   * if (str == u"other literal") { ... }
   3450   * \endcode
   3451   *
   3452   * It is recommended to mark this constructor "explicit" by
   3453   * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
   3454   * on the compiler command line or similar.
   3455   * @param codepageData an array of bytes, null-terminated,
   3456   *                     in the platform's default codepage.
   3457   * @stable ICU 2.0
   3458   */
   3459  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
   3460 
   3461  /**
   3462   * char* constructor.
   3463   * Uses the default converter (and thus depends on the ICU conversion code)
   3464   * unless U_CHARSET_IS_UTF8 is set to 1.
   3465   * @param codepageData an array of bytes in the platform's default codepage.
   3466   * @param dataLength The number of bytes in `codepageData`.
   3467   * @stable ICU 2.0
   3468   */
   3469  UnicodeString(const char *codepageData, int32_t dataLength);
   3470 
   3471 #endif
   3472 
   3473 #if !UCONFIG_NO_CONVERSION
   3474 
   3475  /**
   3476   * char* constructor.
   3477   * @param codepageData an array of bytes, null-terminated
   3478   * @param codepage the encoding of `codepageData`.  The special
   3479   * value 0 for `codepage` indicates that the text is in the
   3480   * platform's default codepage.
   3481   *
   3482   * If `codepage` is an empty string (`""`),
   3483   * then a simple conversion is performed on the codepage-invariant
   3484   * subset ("invariant characters") of the platform encoding. See utypes.h.
   3485   * Recommendation: For invariant-character strings use the constructor
   3486   * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
   3487   * because it avoids object code dependencies of UnicodeString on
   3488   * the conversion code.
   3489   *
   3490   * @stable ICU 2.0
   3491   */
   3492  UnicodeString(const char *codepageData, const char *codepage);
   3493 
   3494  /**
   3495   * char* constructor.
   3496   * @param codepageData an array of bytes.
   3497   * @param dataLength The number of bytes in `codepageData`.
   3498   * @param codepage the encoding of `codepageData`.  The special
   3499   * value 0 for `codepage` indicates that the text is in the
   3500   * platform's default codepage.
   3501   * If `codepage` is an empty string (`""`),
   3502   * then a simple conversion is performed on the codepage-invariant
   3503   * subset ("invariant characters") of the platform encoding. See utypes.h.
   3504   * Recommendation: For invariant-character strings use the constructor
   3505   * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
   3506   * because it avoids object code dependencies of UnicodeString on
   3507   * the conversion code.
   3508   *
   3509   * @stable ICU 2.0
   3510   */
   3511  UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
   3512 
   3513  /**
   3514   * char * / UConverter constructor.
   3515   * This constructor uses an existing UConverter object to
   3516   * convert the codepage string to Unicode and construct a UnicodeString
   3517   * from that.
   3518   *
   3519   * The converter is reset at first.
   3520   * If the error code indicates a failure before this constructor is called,
   3521   * or if an error occurs during conversion or construction,
   3522   * then the string will be bogus.
   3523   *
   3524   * This function avoids the overhead of opening and closing a converter if
   3525   * multiple strings are constructed.
   3526   *
   3527   * @param src input codepage string
   3528   * @param srcLength length of the input string, can be -1 for NUL-terminated strings
   3529   * @param cnv converter object (ucnv_resetToUnicode() will be called),
   3530   *        can be nullptr for the default converter
   3531   * @param errorCode normal ICU error code
   3532   * @stable ICU 2.0
   3533   */
   3534  UnicodeString(
   3535        const char *src, int32_t srcLength,
   3536        UConverter *cnv,
   3537        UErrorCode &errorCode);
   3538 
   3539 #endif
   3540 
   3541  /**
   3542   * Constructs a Unicode string from an invariant-character char * string.
   3543   * About invariant characters see utypes.h.
   3544   * This constructor has no runtime dependency on conversion code and is
   3545   * therefore recommended over ones taking a charset name string
   3546   * (where the empty string "" indicates invariant-character conversion).
   3547   *
   3548   * Use the macro US_INV as the third, signature-distinguishing parameter.
   3549   *
   3550   * For example:
   3551   * \code
   3552   *     void fn(const char *s) {
   3553   *       UnicodeString ustr(s, -1, US_INV);
   3554   *       // use ustr ...
   3555   *     }
   3556   * \endcode
   3557   *
   3558   * Note, for string literals:
   3559   * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
   3560   * length determination:
   3561   * \code
   3562   * UnicodeString str(u"literal");
   3563   * if (str == u"other literal") { ... }
   3564   * \endcode
   3565   *
   3566   * @param src String using only invariant characters.
   3567   * @param textLength Length of src, or -1 if NUL-terminated.
   3568   * @param inv Signature-distinguishing parameter, use US_INV.
   3569   *
   3570   * @see US_INV
   3571   * @stable ICU 3.2
   3572   */
   3573  UnicodeString(const char *src, int32_t textLength, enum EInvariant inv);
   3574 
   3575 
   3576  /**
   3577   * Copy constructor.
   3578   *
   3579   * Starting with ICU 2.4, the assignment operator and the copy constructor
   3580   * allocate a new buffer and copy the buffer contents even for readonly aliases.
   3581   * By contrast, the fastCopyFrom() function implements the old,
   3582   * more efficient but less safe behavior
   3583   * of making this string also a readonly alias to the same buffer.
   3584   *
   3585   * If the source object has an "open" buffer from getBuffer(minCapacity),
   3586   * then the copy is an empty string.
   3587   *
   3588   * @param that The UnicodeString object to copy.
   3589   * @stable ICU 2.0
   3590   * @see fastCopyFrom
   3591   */
   3592  UnicodeString(const UnicodeString& that);
   3593 
   3594  /**
   3595   * Move constructor; might leave src in bogus state.
   3596   * This string will have the same contents and state that the source string had.
   3597   * @param src source string
   3598   * @stable ICU 56
   3599   */
   3600  UnicodeString(UnicodeString &&src) noexcept;
   3601 
   3602  /**
   3603   * 'Substring' constructor from tail of source string.
   3604   * @param src The UnicodeString object to copy.
   3605   * @param srcStart The offset into `src` at which to start copying.
   3606   * @stable ICU 2.2
   3607   */
   3608  UnicodeString(const UnicodeString& src, int32_t srcStart);
   3609 
   3610  /**
   3611   * 'Substring' constructor from subrange of source string.
   3612   * @param src The UnicodeString object to copy.
   3613   * @param srcStart The offset into `src` at which to start copying.
   3614   * @param srcLength The number of characters from `src` to copy.
   3615   * @stable ICU 2.2
   3616   */
   3617  UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
   3618 
   3619  /**
   3620   * Clone this object, an instance of a subclass of Replaceable.
   3621   * Clones can be used concurrently in multiple threads.
   3622   * If a subclass does not implement clone(), or if an error occurs,
   3623   * then nullptr is returned.
   3624   * The caller must delete the clone.
   3625   *
   3626   * @return a clone of this object
   3627   *
   3628   * @see Replaceable::clone
   3629   * @see getDynamicClassID
   3630   * @stable ICU 2.6
   3631   */
   3632  virtual UnicodeString *clone() const override;
   3633 
   3634  /** Destructor.
   3635   * @stable ICU 2.0
   3636   */
   3637  virtual ~UnicodeString();
   3638 
   3639  /**
   3640   * Readonly-aliasing factory method.
   3641   * Aliases the same buffer as the input `text`
   3642   * which is, or which is implicitly convertible to,
   3643   * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
   3644   * The string is bogus if the string view is too long.
   3645   *
   3646   * The text will be used for the UnicodeString object, but
   3647   * it will not be released when the UnicodeString is destroyed.
   3648   * This has copy-on-write semantics:
   3649   * When the string is modified, then the buffer is first copied into
   3650   * newly allocated memory.
   3651   * The aliased buffer is never modified.
   3652   *
   3653   * In an assignment to another UnicodeString, when using the copy constructor
   3654   * or the assignment operator, the text will be copied.
   3655   * When using fastCopyFrom(), the text will be aliased again,
   3656   * so that both strings then alias the same readonly-text.
   3657   *
   3658   * @param text The string view to alias for the UnicodeString.
   3659   * @stable ICU 76
   3660   */
   3661  template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
   3662  static inline UnicodeString readOnlyAlias(const S &text) {
   3663    return readOnlyAliasFromU16StringView(internal::toU16StringView(text));
   3664  }
   3665 
   3666  /**
   3667   * Readonly-aliasing factory method.
   3668   * Aliases the same buffer as the input `text`.
   3669   *
   3670   * The text will be used for the UnicodeString object, but
   3671   * it will not be released when the UnicodeString is destroyed.
   3672   * This has copy-on-write semantics:
   3673   * When the string is modified, then the buffer is first copied into
   3674   * newly allocated memory.
   3675   * The aliased buffer is never modified.
   3676   *
   3677   * In an assignment to another UnicodeString, when using the copy constructor
   3678   * or the assignment operator, the text will be copied.
   3679   * When using fastCopyFrom(), the text will be aliased again,
   3680   * so that both strings then alias the same readonly-text.
   3681   *
   3682   * @param text The UnicodeString to alias.
   3683   * @stable ICU 76
   3684   */
   3685  static inline UnicodeString readOnlyAlias(const UnicodeString &text) {
   3686    return readOnlyAliasFromUnicodeString(text);
   3687  }
   3688 
   3689  /**
   3690   * Create a UnicodeString from a UTF-8 string.
   3691   * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
   3692   * Calls u_strFromUTF8WithSub().
   3693   *
   3694   * @param utf8 UTF-8 input string.
   3695   *             Note that a StringPiece can be implicitly constructed
   3696   *             from a std::string or a NUL-terminated const char * string.
   3697   * @return A UnicodeString with equivalent UTF-16 contents.
   3698   * @see toUTF8
   3699   * @see toUTF8String
   3700   * @stable ICU 4.2
   3701   */
   3702  static UnicodeString fromUTF8(StringPiece utf8);
   3703 
   3704  /**
   3705   * Create a UnicodeString from a UTF-32 string.
   3706   * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
   3707   * Calls u_strFromUTF32WithSub().
   3708   *
   3709   * @param utf32 UTF-32 input string. Must not be nullptr.
   3710   * @param length Length of the input string, or -1 if NUL-terminated.
   3711   * @return A UnicodeString with equivalent UTF-16 contents.
   3712   * @see toUTF32
   3713   * @stable ICU 4.2
   3714   */
   3715  static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
   3716 
   3717  /* Miscellaneous operations */
   3718 
   3719  /**
   3720   * Unescape a string of characters and return a string containing
   3721   * the result.  The following escape sequences are recognized:
   3722   *
   3723   * \\uhhhh       4 hex digits; h in [0-9A-Fa-f]
   3724   * \\Uhhhhhhhh   8 hex digits
   3725   * \\xhh         1-2 hex digits
   3726   * \\ooo         1-3 octal digits; o in [0-7]
   3727   * \\cX          control-X; X is masked with 0x1F
   3728   *
   3729   * as well as the standard ANSI C escapes:
   3730   *
   3731   * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
   3732   * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
   3733   * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
   3734   *
   3735   * Anything else following a backslash is generically escaped.  For
   3736   * example, "[a\\-z]" returns "[a-z]".
   3737   *
   3738   * If an escape sequence is ill-formed, this method returns an empty
   3739   * string.  An example of an ill-formed sequence is "\\u" followed by
   3740   * fewer than 4 hex digits.
   3741   *
   3742   * This function is similar to u_unescape() but not identical to it.
   3743   * The latter takes a source char*, so it does escape recognition
   3744   * and also invariant conversion.
   3745   *
   3746   * @return a string with backslash escapes interpreted, or an
   3747   * empty string on error.
   3748   * @see UnicodeString#unescapeAt()
   3749   * @see u_unescape()
   3750   * @see u_unescapeAt()
   3751   * @stable ICU 2.0
   3752   */
   3753  UnicodeString unescape() const;
   3754 
   3755  /**
   3756   * Unescape a single escape sequence and return the represented
   3757   * character.  See unescape() for a listing of the recognized escape
   3758   * sequences.  The character at offset-1 is assumed (without
   3759   * checking) to be a backslash.  If the escape sequence is
   3760   * ill-formed, or the offset is out of range, U_SENTINEL=-1 is
   3761   * returned.
   3762   *
   3763   * @param offset an input output parameter.  On input, it is the
   3764   * offset into this string where the escape sequence is located,
   3765   * after the initial backslash.  On output, it is advanced after the
   3766   * last character parsed.  On error, it is not advanced at all.
   3767   * @return the character represented by the escape sequence at
   3768   * offset, or U_SENTINEL=-1 on error.
   3769   * @see UnicodeString#unescape()
   3770   * @see u_unescape()
   3771   * @see u_unescapeAt()
   3772   * @stable ICU 2.0
   3773   */
   3774  UChar32 unescapeAt(int32_t &offset) const;
   3775 
   3776  /**
   3777   * ICU "poor man's RTTI", returns a UClassID for this class.
   3778   *
   3779   * @stable ICU 2.2
   3780   */
   3781  static UClassID U_EXPORT2 getStaticClassID();
   3782 
   3783  /**
   3784   * ICU "poor man's RTTI", returns a UClassID for the actual class.
   3785   *
   3786   * @stable ICU 2.2
   3787   */
   3788  virtual UClassID getDynamicClassID() const override;
   3789 
   3790  //========================================
   3791  // Implementation methods
   3792  //========================================
   3793 
   3794 protected:
   3795  /**
   3796   * Implement Replaceable::getLength() (see jitterbug 1027).
   3797   * @stable ICU 2.4
   3798   */
   3799  virtual int32_t getLength() const override;
   3800 
   3801  /**
   3802   * The change in Replaceable to use virtual getCharAt() allows
   3803   * UnicodeString::charAt() to be inline again (see jitterbug 709).
   3804   * @stable ICU 2.4
   3805   */
   3806  virtual char16_t getCharAt(int32_t offset) const override;
   3807 
   3808  /**
   3809   * The change in Replaceable to use virtual getChar32At() allows
   3810   * UnicodeString::char32At() to be inline again (see jitterbug 709).
   3811   * @stable ICU 2.4
   3812   */
   3813  virtual UChar32 getChar32At(int32_t offset) const override;
   3814 
   3815 private:
   3816  static UnicodeString readOnlyAliasFromU16StringView(std::u16string_view text);
   3817  static UnicodeString readOnlyAliasFromUnicodeString(const UnicodeString &text);
   3818 
   3819  // For char* constructors. Could be made public.
   3820  UnicodeString &setToUTF8(StringPiece utf8);
   3821  // For extract(char*).
   3822  // We could make a toUTF8(target, capacity, errorCode) public but not
   3823  // this version: New API will be cleaner if we make callers create substrings
   3824  // rather than having start+length on every method,
   3825  // and it should take a UErrorCode&.
   3826  int32_t
   3827  toUTF8(int32_t start, int32_t len,
   3828         char *target, int32_t capacity) const;
   3829 
   3830  /**
   3831   * Internal string contents comparison, called by operator==.
   3832   * Requires: this & text not bogus and have same lengths.
   3833   */
   3834  inline UBool doEquals(const UnicodeString &text, int32_t len) const {
   3835    return doEquals(text.getArrayStart(), len);
   3836  }
   3837  UBool doEquals(const char16_t *text, int32_t len) const;
   3838 
   3839  inline UBool
   3840  doEqualsSubstring(int32_t start,
   3841           int32_t length,
   3842           const UnicodeString& srcText,
   3843           int32_t srcStart,
   3844           int32_t srcLength) const;
   3845 
   3846  UBool doEqualsSubstring(int32_t start,
   3847           int32_t length,
   3848           const char16_t *srcChars,
   3849           int32_t srcStart,
   3850           int32_t srcLength) const;
   3851 
   3852  inline int8_t
   3853  doCompare(int32_t start,
   3854           int32_t length,
   3855           const UnicodeString& srcText,
   3856           int32_t srcStart,
   3857           int32_t srcLength) const;
   3858 
   3859  int8_t doCompare(int32_t start,
   3860           int32_t length,
   3861           const char16_t *srcChars,
   3862           int32_t srcStart,
   3863           int32_t srcLength) const;
   3864 
   3865  inline int8_t
   3866  doCompareCodePointOrder(int32_t start,
   3867                          int32_t length,
   3868                          const UnicodeString& srcText,
   3869                          int32_t srcStart,
   3870                          int32_t srcLength) const;
   3871 
   3872  int8_t doCompareCodePointOrder(int32_t start,
   3873                                 int32_t length,
   3874                                 const char16_t *srcChars,
   3875                                 int32_t srcStart,
   3876                                 int32_t srcLength) const;
   3877 
   3878  inline int8_t
   3879  doCaseCompare(int32_t start,
   3880                int32_t length,
   3881                const UnicodeString &srcText,
   3882                int32_t srcStart,
   3883                int32_t srcLength,
   3884                uint32_t options) const;
   3885 
   3886  int8_t
   3887  doCaseCompare(int32_t start,
   3888                int32_t length,
   3889                const char16_t *srcChars,
   3890                int32_t srcStart,
   3891                int32_t srcLength,
   3892                uint32_t options) const;
   3893 
   3894  int32_t doIndexOf(char16_t c,
   3895            int32_t start,
   3896            int32_t length) const;
   3897 
   3898  int32_t doIndexOf(UChar32 c,
   3899                        int32_t start,
   3900                        int32_t length) const;
   3901 
   3902  int32_t doLastIndexOf(char16_t c,
   3903                int32_t start,
   3904                int32_t length) const;
   3905 
   3906  int32_t doLastIndexOf(UChar32 c,
   3907                            int32_t start,
   3908                            int32_t length) const;
   3909 
   3910  void doExtract(int32_t start,
   3911         int32_t length,
   3912         char16_t *dst,
   3913         int32_t dstStart) const;
   3914 
   3915  inline void doExtract(int32_t start,
   3916         int32_t length,
   3917         UnicodeString& target) const;
   3918 
   3919  inline char16_t doCharAt(int32_t offset)  const;
   3920 
   3921  UnicodeString& doReplace(int32_t start,
   3922               int32_t length,
   3923               const UnicodeString& srcText,
   3924               int32_t srcStart,
   3925               int32_t srcLength);
   3926 
   3927  UnicodeString& doReplace(int32_t start,
   3928               int32_t length,
   3929               const char16_t *srcChars,
   3930               int32_t srcStart,
   3931               int32_t srcLength);
   3932  UnicodeString& doReplace(int32_t start, int32_t length, std::u16string_view src);
   3933 
   3934  UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
   3935  UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength);
   3936  UnicodeString& doAppend(std::u16string_view src);
   3937 
   3938  UnicodeString& doReverse(int32_t start,
   3939               int32_t length);
   3940 
   3941  // calculate hash code
   3942  int32_t doHashCode() const;
   3943 
   3944  // get pointer to start of array
   3945  // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
   3946  inline char16_t* getArrayStart();
   3947  inline const char16_t* getArrayStart() const;
   3948 
   3949  inline UBool hasShortLength() const;
   3950  inline int32_t getShortLength() const;
   3951 
   3952  // A UnicodeString object (not necessarily its current buffer)
   3953  // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
   3954  inline UBool isWritable() const;
   3955 
   3956  // Is the current buffer writable?
   3957  inline UBool isBufferWritable() const;
   3958 
   3959  // None of the following does releaseArray().
   3960  inline void setZeroLength();
   3961  inline void setShortLength(int32_t len);
   3962  inline void setLength(int32_t len);
   3963  inline void setToEmpty();
   3964  inline void setArray(char16_t *array, int32_t len, int32_t capacity); // sets length but not flags
   3965 
   3966  // allocate the array; result may be the stack buffer
   3967  // sets refCount to 1 if appropriate
   3968  // sets fArray, fCapacity, and flags
   3969  // sets length to 0
   3970  // returns boolean for success or failure
   3971  UBool allocate(int32_t capacity);
   3972 
   3973  // release the array if owned
   3974  void releaseArray();
   3975 
   3976  // turn a bogus string into an empty one
   3977  void unBogus();
   3978 
   3979  // implements assignment operator, copy constructor, and fastCopyFrom()
   3980  UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=false);
   3981 
   3982  // Copies just the fields without memory management.
   3983  void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) noexcept;
   3984 
   3985  // Pin start and limit to acceptable values.
   3986  inline void pinIndex(int32_t& start) const;
   3987  inline void pinIndices(int32_t& start,
   3988                         int32_t& length) const;
   3989 
   3990 #if !UCONFIG_NO_CONVERSION
   3991 
   3992  /* Internal extract() using UConverter. */
   3993  int32_t doExtract(int32_t start, int32_t length,
   3994                    char *dest, int32_t destCapacity,
   3995                    UConverter *cnv,
   3996                    UErrorCode &errorCode) const;
   3997 
   3998  /*
   3999   * Real constructor for converting from codepage data.
   4000   * It assumes that it is called with !fRefCounted.
   4001   *
   4002   * If `codepage==0`, then the default converter
   4003   * is used for the platform encoding.
   4004   * If `codepage` is an empty string (`""`),
   4005   * then a simple conversion is performed on the codepage-invariant
   4006   * subset ("invariant characters") of the platform encoding. See utypes.h.
   4007   */
   4008  void doCodepageCreate(const char *codepageData,
   4009                        int32_t dataLength,
   4010                        const char *codepage);
   4011 
   4012  /*
   4013   * Worker function for creating a UnicodeString from
   4014   * a codepage string using a UConverter.
   4015   */
   4016  void
   4017  doCodepageCreate(const char *codepageData,
   4018                   int32_t dataLength,
   4019                   UConverter *converter,
   4020                   UErrorCode &status);
   4021 
   4022 #endif
   4023 
   4024  /*
   4025   * This function is called when write access to the array
   4026   * is necessary.
   4027   *
   4028   * We need to make a copy of the array if
   4029   * the buffer is read-only, or
   4030   * the buffer is refCounted (shared), and refCount>1, or
   4031   * the buffer is too small.
   4032   *
   4033   * Return false if memory could not be allocated.
   4034   */
   4035  UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
   4036                           int32_t growCapacity = -1,
   4037                           UBool doCopyArray = true,
   4038                           int32_t** pBufferToDelete = nullptr,
   4039                           UBool forceClone = false);
   4040 
   4041  /**
   4042   * Common function for UnicodeString case mappings.
   4043   * The stringCaseMapper has the same type UStringCaseMapper
   4044   * as in ustr_imp.h for ustrcase_map().
   4045   */
   4046  UnicodeString &
   4047  caseMap(int32_t caseLocale, uint32_t options,
   4048 #if !UCONFIG_NO_BREAK_ITERATION
   4049          BreakIterator *iter,
   4050 #endif
   4051          UStringCaseMapper *stringCaseMapper);
   4052 
   4053  // ref counting
   4054  void addRef();
   4055  int32_t removeRef();
   4056  int32_t refCount() const;
   4057 
   4058  // constants
   4059  enum {
   4060    /**
   4061     * Size of stack buffer for short strings.
   4062     * Must be at least U16_MAX_LENGTH for the single-code point constructor to work.
   4063     * @see UNISTR_OBJECT_SIZE
   4064     */
   4065    US_STACKBUF_SIZE = static_cast<int32_t>(UNISTR_OBJECT_SIZE - sizeof(void*) - 2) / U_SIZEOF_UCHAR,
   4066    kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index)
   4067    kInvalidHashCode=0, // invalid hash code
   4068    kEmptyHashCode=1, // hash code for empty string
   4069 
   4070    // bit flag values for fLengthAndFlags
   4071    kIsBogus=1,         // this string is bogus, i.e., not valid or nullptr
   4072    kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields
   4073    kRefCounted=4,      // there is a refCount field before the characters in fArray
   4074    kBufferIsReadonly=8,// do not write to this buffer
   4075    kOpenGetBuffer=16,  // getBuffer(minCapacity) was called (is "open"),
   4076                        // and releaseBuffer(newLength) must be called
   4077    kAllStorageFlags=0x1f,
   4078 
   4079    kLengthShift=5,     // remaining 11 bits for non-negative short length, or negative if long
   4080    kLength1=1<<kLengthShift,
   4081    kMaxShortLength=0x3ff,  // max non-negative short length (leaves top bit 0)
   4082    kLengthIsLarge=0xffe0,  // short length < 0, real length is in fUnion.fFields.fLength
   4083 
   4084    // combined values for convenience
   4085    kShortString=kUsingStackBuffer,
   4086    kLongString=kRefCounted,
   4087    kReadonlyAlias=kBufferIsReadonly,
   4088    kWritableAlias=0
   4089  };
   4090 
   4091  friend class UnicodeStringAppendable;
   4092 
   4093  union StackBufferOrFields;        // forward declaration necessary before friend declaration
   4094  friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
   4095 
   4096  /*
   4097   * The following are all the class fields that are stored
   4098   * in each UnicodeString object.
   4099   * Note that UnicodeString has virtual functions,
   4100   * therefore there is an implicit vtable pointer
   4101   * as the first real field.
   4102   * The fields should be aligned such that no padding is necessary.
   4103   * On 32-bit machines, the size should be 32 bytes,
   4104   * on 64-bit machines (8-byte pointers), it should be 40 bytes.
   4105   *
   4106   * We use a hack to achieve this.
   4107   *
   4108   * With at least some compilers, each of the following is forced to
   4109   * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
   4110   * rounded up with additional padding if the fields do not already fit that requirement:
   4111   * - sizeof(class UnicodeString)
   4112   * - offsetof(UnicodeString, fUnion)
   4113   * - sizeof(fUnion)
   4114   * - sizeof(fStackFields)
   4115   *
   4116   * We optimize for the longest possible internal buffer for short strings.
   4117   * fUnion.fStackFields begins with 2 bytes for storage flags
   4118   * and the length of relatively short strings,
   4119   * followed by the buffer for short string contents.
   4120   * There is no padding inside fStackFields.
   4121   *
   4122   * Heap-allocated and aliased strings use fUnion.fFields.
   4123   * Both fStackFields and fFields must begin with the same fields for flags and short length,
   4124   * that is, those must have the same memory offsets inside the object,
   4125   * because the flags must be inspected in order to decide which half of fUnion is being used.
   4126   * We assume that the compiler does not reorder the fields.
   4127   *
   4128   * (Padding at the end of fFields is ok:
   4129   * As long as it is no larger than fStackFields, it is not wasted space.)
   4130   *
   4131   * For some of the history of the UnicodeString class fields layout, see
   4132   * - ICU ticket #11551 "longer UnicodeString contents in stack buffer"
   4133   * - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays"
   4134   * - ICU ticket #8322 "why is sizeof(UnicodeString)==48?"
   4135   */
   4136  // (implicit) *vtable;
   4137  union StackBufferOrFields {
   4138    // fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used.
   4139    // Each struct of the union must begin with fLengthAndFlags.
   4140    struct {
   4141      int16_t fLengthAndFlags;          // bit fields: see constants above
   4142      char16_t fBuffer[US_STACKBUF_SIZE];  // buffer for short strings
   4143    } fStackFields;
   4144    struct {
   4145      int16_t fLengthAndFlags;          // bit fields: see constants above
   4146      int32_t fLength;    // number of characters in fArray if >127; else undefined
   4147      int32_t fCapacity;  // capacity of fArray (in char16_ts)
   4148      // array pointer last to minimize padding for machines with P128 data model
   4149      // or pointer sizes that are not a power of 2
   4150      char16_t   *fArray;    // the Unicode data
   4151    } fFields;
   4152  } fUnion;
   4153 };
   4154 
   4155 /**
   4156 * Creates a new UnicodeString from the concatenation of two others.
   4157 *
   4158 * @param s1 The first string to be copied to the new one.
   4159 * @param s2 The second string to be copied to the new one, after s1.
   4160 * @return UnicodeString(s1).append(s2)
   4161 * @stable ICU 2.8
   4162 */
   4163 U_COMMON_API UnicodeString U_EXPORT2
   4164 operator+ (const UnicodeString &s1, const UnicodeString &s2);
   4165 
   4166 /**
   4167 * Creates a new UnicodeString from the concatenation of a UnicodeString and `s2`
   4168 * which is, or which is implicitly convertible to,
   4169 * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
   4170 *
   4171 * @param s1 The string to be copied to the new one.
   4172 * @param s2 The string view to be copied to the new string, after s1.
   4173 * @return UnicodeString(s1).append(s2)
   4174 * @stable ICU 76
   4175 */
   4176 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
   4177 inline UnicodeString operator+(const UnicodeString &s1, const S &s2) {
   4178  return unistr_internalConcat(s1, internal::toU16StringView(s2));
   4179 }
   4180 
   4181 #ifndef U_FORCE_HIDE_INTERNAL_API
   4182 /** @internal */
   4183 U_COMMON_API UnicodeString U_EXPORT2
   4184 unistr_internalConcat(const UnicodeString &s1, std::u16string_view s2);
   4185 #endif
   4186 
   4187 //========================================
   4188 // Inline members
   4189 //========================================
   4190 
   4191 //========================================
   4192 // Privates
   4193 //========================================
   4194 
   4195 inline void
   4196 UnicodeString::pinIndex(int32_t& start) const
   4197 {
   4198  // pin index
   4199  if(start < 0) {
   4200    start = 0;
   4201  } else if(start > length()) {
   4202    start = length();
   4203  }
   4204 }
   4205 
   4206 inline void
   4207 UnicodeString::pinIndices(int32_t& start,
   4208                          int32_t& _length) const
   4209 {
   4210  // pin indices
   4211  int32_t len = length();
   4212  if(start < 0) {
   4213    start = 0;
   4214  } else if(start > len) {
   4215    start = len;
   4216  }
   4217  if(_length < 0) {
   4218    _length = 0;
   4219  } else if(_length > (len - start)) {
   4220    _length = (len - start);
   4221  }
   4222 }
   4223 
   4224 inline char16_t*
   4225 UnicodeString::getArrayStart() {
   4226  return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
   4227    fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
   4228 }
   4229 
   4230 inline const char16_t*
   4231 UnicodeString::getArrayStart() const {
   4232  return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
   4233    fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
   4234 }
   4235 
   4236 //========================================
   4237 // Default constructor
   4238 //========================================
   4239 
   4240 inline
   4241 UnicodeString::UnicodeString() {
   4242  fUnion.fStackFields.fLengthAndFlags=kShortString;
   4243 }
   4244 
   4245 inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/) {
   4246  fUnion.fStackFields.fLengthAndFlags=kShortString;
   4247 }
   4248 
   4249 inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/, int32_t /*length*/) {
   4250  fUnion.fStackFields.fLengthAndFlags=kShortString;
   4251 }
   4252 
   4253 inline UnicodeString::UnicodeString(std::nullptr_t /*buffer*/, int32_t /*buffLength*/, int32_t /*buffCapacity*/) {
   4254  fUnion.fStackFields.fLengthAndFlags=kShortString;
   4255 }
   4256 
   4257 //========================================
   4258 // Read-only implementation methods
   4259 //========================================
   4260 inline UBool
   4261 UnicodeString::hasShortLength() const {
   4262  return fUnion.fFields.fLengthAndFlags>=0;
   4263 }
   4264 
   4265 inline int32_t
   4266 UnicodeString::getShortLength() const {
   4267  // fLengthAndFlags must be non-negative -> short length >= 0
   4268  // and arithmetic or logical shift does not matter.
   4269  return fUnion.fFields.fLengthAndFlags>>kLengthShift;
   4270 }
   4271 
   4272 inline int32_t
   4273 UnicodeString::length() const {
   4274  return hasShortLength() ? getShortLength() : fUnion.fFields.fLength;
   4275 }
   4276 
   4277 inline int32_t
   4278 UnicodeString::getCapacity() const {
   4279  return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
   4280    US_STACKBUF_SIZE : fUnion.fFields.fCapacity;
   4281 }
   4282 
   4283 inline int32_t
   4284 UnicodeString::hashCode() const
   4285 { return doHashCode(); }
   4286 
   4287 inline UBool
   4288 UnicodeString::isBogus() const
   4289 { return fUnion.fFields.fLengthAndFlags & kIsBogus; }
   4290 
   4291 inline UBool
   4292 UnicodeString::isWritable() const
   4293 { return !(fUnion.fFields.fLengthAndFlags & (kOpenGetBuffer | kIsBogus)); }
   4294 
   4295 inline UBool
   4296 UnicodeString::isBufferWritable() const
   4297 {
   4298  return
   4299      !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
   4300      (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1);
   4301 }
   4302 
   4303 inline const char16_t *
   4304 UnicodeString::getBuffer() const {
   4305  if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) {
   4306    return nullptr;
   4307  } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
   4308    return fUnion.fStackFields.fBuffer;
   4309  } else {
   4310    return fUnion.fFields.fArray;
   4311  }
   4312 }
   4313 
   4314 //========================================
   4315 // Read-only alias methods
   4316 //========================================
   4317 inline int8_t
   4318 UnicodeString::doCompare(int32_t start,
   4319              int32_t thisLength,
   4320              const UnicodeString& srcText,
   4321              int32_t srcStart,
   4322              int32_t srcLength) const
   4323 {
   4324  if(srcText.isBogus()) {
   4325    return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise
   4326  } else {
   4327    srcText.pinIndices(srcStart, srcLength);
   4328    return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
   4329  }
   4330 }
   4331 
   4332 inline UBool
   4333 UnicodeString::doEqualsSubstring(int32_t start,
   4334              int32_t thisLength,
   4335              const UnicodeString& srcText,
   4336              int32_t srcStart,
   4337              int32_t srcLength) const
   4338 {
   4339  if(srcText.isBogus()) {
   4340    return isBogus();
   4341  } else {
   4342    srcText.pinIndices(srcStart, srcLength);
   4343    return !isBogus() && doEqualsSubstring(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
   4344  }
   4345 }
   4346 
   4347 inline bool
   4348 UnicodeString::operator== (const UnicodeString& text) const
   4349 {
   4350  if(isBogus()) {
   4351    return text.isBogus();
   4352  } else {
   4353    int32_t len = length(), textLength = text.length();
   4354    return !text.isBogus() && len == textLength && doEquals(text, len);
   4355  }
   4356 }
   4357 
   4358 inline bool
   4359 UnicodeString::operator!= (const UnicodeString& text) const
   4360 { return (! operator==(text)); }
   4361 
   4362 inline UBool
   4363 UnicodeString::operator> (const UnicodeString& text) const
   4364 { return doCompare(0, length(), text, 0, text.length()) == 1; }
   4365 
   4366 inline UBool
   4367 UnicodeString::operator< (const UnicodeString& text) const
   4368 { return doCompare(0, length(), text, 0, text.length()) == -1; }
   4369 
   4370 inline UBool
   4371 UnicodeString::operator>= (const UnicodeString& text) const
   4372 { return doCompare(0, length(), text, 0, text.length()) != -1; }
   4373 
   4374 inline UBool
   4375 UnicodeString::operator<= (const UnicodeString& text) const
   4376 { return doCompare(0, length(), text, 0, text.length()) != 1; }
   4377 
   4378 inline int8_t
   4379 UnicodeString::compare(const UnicodeString& text) const
   4380 { return doCompare(0, length(), text, 0, text.length()); }
   4381 
   4382 inline int8_t
   4383 UnicodeString::compare(int32_t start,
   4384               int32_t _length,
   4385               const UnicodeString& srcText) const
   4386 { return doCompare(start, _length, srcText, 0, srcText.length()); }
   4387 
   4388 inline int8_t
   4389 UnicodeString::compare(ConstChar16Ptr srcChars,
   4390               int32_t srcLength) const
   4391 { return doCompare(0, length(), srcChars, 0, srcLength); }
   4392 
   4393 inline int8_t
   4394 UnicodeString::compare(int32_t start,
   4395               int32_t _length,
   4396               const UnicodeString& srcText,
   4397               int32_t srcStart,
   4398               int32_t srcLength) const
   4399 { return doCompare(start, _length, srcText, srcStart, srcLength); }
   4400 
   4401 inline int8_t
   4402 UnicodeString::compare(int32_t start,
   4403               int32_t _length,
   4404               const char16_t *srcChars) const
   4405 { return doCompare(start, _length, srcChars, 0, _length); }
   4406 
   4407 inline int8_t
   4408 UnicodeString::compare(int32_t start,
   4409               int32_t _length,
   4410               const char16_t *srcChars,
   4411               int32_t srcStart,
   4412               int32_t srcLength) const
   4413 { return doCompare(start, _length, srcChars, srcStart, srcLength); }
   4414 
   4415 inline int8_t
   4416 UnicodeString::compareBetween(int32_t start,
   4417                  int32_t limit,
   4418                  const UnicodeString& srcText,
   4419                  int32_t srcStart,
   4420                  int32_t srcLimit) const
   4421 { return doCompare(start, limit - start,
   4422           srcText, srcStart, srcLimit - srcStart); }
   4423 
   4424 inline int8_t
   4425 UnicodeString::doCompareCodePointOrder(int32_t start,
   4426                                       int32_t thisLength,
   4427                                       const UnicodeString& srcText,
   4428                                       int32_t srcStart,
   4429                                       int32_t srcLength) const
   4430 {
   4431  if(srcText.isBogus()) {
   4432    return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise
   4433  } else {
   4434    srcText.pinIndices(srcStart, srcLength);
   4435    return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
   4436  }
   4437 }
   4438 
   4439 inline int8_t
   4440 UnicodeString::compareCodePointOrder(const UnicodeString& text) const
   4441 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
   4442 
   4443 inline int8_t
   4444 UnicodeString::compareCodePointOrder(int32_t start,
   4445                                     int32_t _length,
   4446                                     const UnicodeString& srcText) const
   4447 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
   4448 
   4449 inline int8_t
   4450 UnicodeString::compareCodePointOrder(ConstChar16Ptr srcChars,
   4451                                     int32_t srcLength) const
   4452 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
   4453 
   4454 inline int8_t
   4455 UnicodeString::compareCodePointOrder(int32_t start,
   4456                                     int32_t _length,
   4457                                     const UnicodeString& srcText,
   4458                                     int32_t srcStart,
   4459                                     int32_t srcLength) const
   4460 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
   4461 
   4462 inline int8_t
   4463 UnicodeString::compareCodePointOrder(int32_t start,
   4464                                     int32_t _length,
   4465                                     const char16_t *srcChars) const
   4466 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
   4467 
   4468 inline int8_t
   4469 UnicodeString::compareCodePointOrder(int32_t start,
   4470                                     int32_t _length,
   4471                                     const char16_t *srcChars,
   4472                                     int32_t srcStart,
   4473                                     int32_t srcLength) const
   4474 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
   4475 
   4476 inline int8_t
   4477 UnicodeString::compareCodePointOrderBetween(int32_t start,
   4478                                            int32_t limit,
   4479                                            const UnicodeString& srcText,
   4480                                            int32_t srcStart,
   4481                                            int32_t srcLimit) const
   4482 { return doCompareCodePointOrder(start, limit - start,
   4483           srcText, srcStart, srcLimit - srcStart); }
   4484 
   4485 inline int8_t
   4486 UnicodeString::doCaseCompare(int32_t start,
   4487                             int32_t thisLength,
   4488                             const UnicodeString &srcText,
   4489                             int32_t srcStart,
   4490                             int32_t srcLength,
   4491                             uint32_t options) const
   4492 {
   4493  if(srcText.isBogus()) {
   4494    return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise
   4495  } else {
   4496    srcText.pinIndices(srcStart, srcLength);
   4497    return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
   4498  }
   4499 }
   4500 
   4501 inline int8_t
   4502 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
   4503  return doCaseCompare(0, length(), text, 0, text.length(), options);
   4504 }
   4505 
   4506 inline int8_t
   4507 UnicodeString::caseCompare(int32_t start,
   4508                           int32_t _length,
   4509                           const UnicodeString &srcText,
   4510                           uint32_t options) const {
   4511  return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
   4512 }
   4513 
   4514 inline int8_t
   4515 UnicodeString::caseCompare(ConstChar16Ptr srcChars,
   4516                           int32_t srcLength,
   4517                           uint32_t options) const {
   4518  return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
   4519 }
   4520 
   4521 inline int8_t
   4522 UnicodeString::caseCompare(int32_t start,
   4523                           int32_t _length,
   4524                           const UnicodeString &srcText,
   4525                           int32_t srcStart,
   4526                           int32_t srcLength,
   4527                           uint32_t options) const {
   4528  return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
   4529 }
   4530 
   4531 inline int8_t
   4532 UnicodeString::caseCompare(int32_t start,
   4533                           int32_t _length,
   4534                           const char16_t *srcChars,
   4535                           uint32_t options) const {
   4536  return doCaseCompare(start, _length, srcChars, 0, _length, options);
   4537 }
   4538 
   4539 inline int8_t
   4540 UnicodeString::caseCompare(int32_t start,
   4541                           int32_t _length,
   4542                           const char16_t *srcChars,
   4543                           int32_t srcStart,
   4544                           int32_t srcLength,
   4545                           uint32_t options) const {
   4546  return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
   4547 }
   4548 
   4549 inline int8_t
   4550 UnicodeString::caseCompareBetween(int32_t start,
   4551                                  int32_t limit,
   4552                                  const UnicodeString &srcText,
   4553                                  int32_t srcStart,
   4554                                  int32_t srcLimit,
   4555                                  uint32_t options) const {
   4556  return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
   4557 }
   4558 
   4559 inline int32_t
   4560 UnicodeString::indexOf(const UnicodeString& srcText,
   4561               int32_t srcStart,
   4562               int32_t srcLength,
   4563               int32_t start,
   4564               int32_t _length) const
   4565 {
   4566  if(!srcText.isBogus()) {
   4567    srcText.pinIndices(srcStart, srcLength);
   4568    if(srcLength > 0) {
   4569      return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
   4570    }
   4571  }
   4572  return -1;
   4573 }
   4574 
   4575 inline int32_t
   4576 UnicodeString::indexOf(const UnicodeString& text) const
   4577 { return indexOf(text, 0, text.length(), 0, length()); }
   4578 
   4579 inline int32_t
   4580 UnicodeString::indexOf(const UnicodeString& text,
   4581               int32_t start) const {
   4582  pinIndex(start);
   4583  return indexOf(text, 0, text.length(), start, length() - start);
   4584 }
   4585 
   4586 inline int32_t
   4587 UnicodeString::indexOf(const UnicodeString& text,
   4588               int32_t start,
   4589               int32_t _length) const
   4590 { return indexOf(text, 0, text.length(), start, _length); }
   4591 
   4592 inline int32_t
   4593 UnicodeString::indexOf(const char16_t *srcChars,
   4594               int32_t srcLength,
   4595               int32_t start) const {
   4596  pinIndex(start);
   4597  return indexOf(srcChars, 0, srcLength, start, length() - start);
   4598 }
   4599 
   4600 inline int32_t
   4601 UnicodeString::indexOf(ConstChar16Ptr srcChars,
   4602               int32_t srcLength,
   4603               int32_t start,
   4604               int32_t _length) const
   4605 { return indexOf(srcChars, 0, srcLength, start, _length); }
   4606 
   4607 inline int32_t
   4608 UnicodeString::indexOf(char16_t c,
   4609               int32_t start,
   4610               int32_t _length) const
   4611 { return doIndexOf(c, start, _length); }
   4612 
   4613 inline int32_t
   4614 UnicodeString::indexOf(UChar32 c,
   4615               int32_t start,
   4616               int32_t _length) const
   4617 { return doIndexOf(c, start, _length); }
   4618 
   4619 inline int32_t
   4620 UnicodeString::indexOf(char16_t c) const
   4621 { return doIndexOf(c, 0, length()); }
   4622 
   4623 inline int32_t
   4624 UnicodeString::indexOf(UChar32 c) const
   4625 { return indexOf(c, 0, length()); }
   4626 
   4627 inline int32_t
   4628 UnicodeString::indexOf(char16_t c,
   4629               int32_t start) const {
   4630  pinIndex(start);
   4631  return doIndexOf(c, start, length() - start);
   4632 }
   4633 
   4634 inline int32_t
   4635 UnicodeString::indexOf(UChar32 c,
   4636               int32_t start) const {
   4637  pinIndex(start);
   4638  return indexOf(c, start, length() - start);
   4639 }
   4640 
   4641 inline int32_t
   4642 UnicodeString::lastIndexOf(ConstChar16Ptr srcChars,
   4643               int32_t srcLength,
   4644               int32_t start,
   4645               int32_t _length) const
   4646 { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
   4647 
   4648 inline int32_t
   4649 UnicodeString::lastIndexOf(const char16_t *srcChars,
   4650               int32_t srcLength,
   4651               int32_t start) const {
   4652  pinIndex(start);
   4653  return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
   4654 }
   4655 
   4656 inline int32_t
   4657 UnicodeString::lastIndexOf(const UnicodeString& srcText,
   4658               int32_t srcStart,
   4659               int32_t srcLength,
   4660               int32_t start,
   4661               int32_t _length) const
   4662 {
   4663  if(!srcText.isBogus()) {
   4664    srcText.pinIndices(srcStart, srcLength);
   4665    if(srcLength > 0) {
   4666      return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
   4667    }
   4668  }
   4669  return -1;
   4670 }
   4671 
   4672 inline int32_t
   4673 UnicodeString::lastIndexOf(const UnicodeString& text,
   4674               int32_t start,
   4675               int32_t _length) const
   4676 { return lastIndexOf(text, 0, text.length(), start, _length); }
   4677 
   4678 inline int32_t
   4679 UnicodeString::lastIndexOf(const UnicodeString& text,
   4680               int32_t start) const {
   4681  pinIndex(start);
   4682  return lastIndexOf(text, 0, text.length(), start, length() - start);
   4683 }
   4684 
   4685 inline int32_t
   4686 UnicodeString::lastIndexOf(const UnicodeString& text) const
   4687 { return lastIndexOf(text, 0, text.length(), 0, length()); }
   4688 
   4689 inline int32_t
   4690 UnicodeString::lastIndexOf(char16_t c,
   4691               int32_t start,
   4692               int32_t _length) const
   4693 { return doLastIndexOf(c, start, _length); }
   4694 
   4695 inline int32_t
   4696 UnicodeString::lastIndexOf(UChar32 c,
   4697               int32_t start,
   4698               int32_t _length) const {
   4699  return doLastIndexOf(c, start, _length);
   4700 }
   4701 
   4702 inline int32_t
   4703 UnicodeString::lastIndexOf(char16_t c) const
   4704 { return doLastIndexOf(c, 0, length()); }
   4705 
   4706 inline int32_t
   4707 UnicodeString::lastIndexOf(UChar32 c) const {
   4708  return lastIndexOf(c, 0, length());
   4709 }
   4710 
   4711 inline int32_t
   4712 UnicodeString::lastIndexOf(char16_t c,
   4713               int32_t start) const {
   4714  pinIndex(start);
   4715  return doLastIndexOf(c, start, length() - start);
   4716 }
   4717 
   4718 inline int32_t
   4719 UnicodeString::lastIndexOf(UChar32 c,
   4720               int32_t start) const {
   4721  pinIndex(start);
   4722  return lastIndexOf(c, start, length() - start);
   4723 }
   4724 
   4725 inline UBool
   4726 UnicodeString::startsWith(const UnicodeString& text) const
   4727 { return doEqualsSubstring(0, text.length(), text, 0, text.length()); }
   4728 
   4729 inline UBool
   4730 UnicodeString::startsWith(const UnicodeString& srcText,
   4731              int32_t srcStart,
   4732              int32_t srcLength) const
   4733 { return doEqualsSubstring(0, srcLength, srcText, srcStart, srcLength); }
   4734 
   4735 inline UBool
   4736 UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const {
   4737  if(srcLength < 0) {
   4738    srcLength = u_strlen(toUCharPtr(srcChars));
   4739  }
   4740  return doEqualsSubstring(0, srcLength, srcChars, 0, srcLength);
   4741 }
   4742 
   4743 inline UBool
   4744 UnicodeString::startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const {
   4745  if(srcLength < 0) {
   4746    srcLength = u_strlen(toUCharPtr(srcChars));
   4747  }
   4748  return doEqualsSubstring(0, srcLength, srcChars, srcStart, srcLength);
   4749 }
   4750 
   4751 inline UBool
   4752 UnicodeString::endsWith(const UnicodeString& text) const
   4753 { return doEqualsSubstring(length() - text.length(), text.length(),
   4754           text, 0, text.length()); }
   4755 
   4756 inline UBool
   4757 UnicodeString::endsWith(const UnicodeString& srcText,
   4758            int32_t srcStart,
   4759            int32_t srcLength) const {
   4760  srcText.pinIndices(srcStart, srcLength);
   4761  return doEqualsSubstring(length() - srcLength, srcLength,
   4762                   srcText, srcStart, srcLength);
   4763 }
   4764 
   4765 inline UBool
   4766 UnicodeString::endsWith(ConstChar16Ptr srcChars,
   4767            int32_t srcLength) const {
   4768  if(srcLength < 0) {
   4769    srcLength = u_strlen(toUCharPtr(srcChars));
   4770  }
   4771  return doEqualsSubstring(length() - srcLength, srcLength, srcChars, 0, srcLength);
   4772 }
   4773 
   4774 inline UBool
   4775 UnicodeString::endsWith(const char16_t *srcChars,
   4776            int32_t srcStart,
   4777            int32_t srcLength) const {
   4778  if(srcLength < 0) {
   4779    srcLength = u_strlen(toUCharPtr(srcChars + srcStart));
   4780  }
   4781  return doEqualsSubstring(length() - srcLength, srcLength,
   4782                   srcChars, srcStart, srcLength);
   4783 }
   4784 
   4785 //========================================
   4786 // replace
   4787 //========================================
   4788 inline UnicodeString&
   4789 UnicodeString::replace(int32_t start,
   4790               int32_t _length,
   4791               const UnicodeString& srcText)
   4792 { return doReplace(start, _length, srcText, 0, srcText.length()); }
   4793 
   4794 inline UnicodeString&
   4795 UnicodeString::replace(int32_t start,
   4796               int32_t _length,
   4797               const UnicodeString& srcText,
   4798               int32_t srcStart,
   4799               int32_t srcLength)
   4800 { return doReplace(start, _length, srcText, srcStart, srcLength); }
   4801 
   4802 inline UnicodeString&
   4803 UnicodeString::replace(int32_t start,
   4804               int32_t _length,
   4805               ConstChar16Ptr srcChars,
   4806               int32_t srcLength)
   4807 { return doReplace(start, _length, srcChars, 0, srcLength); }
   4808 
   4809 inline UnicodeString&
   4810 UnicodeString::replace(int32_t start,
   4811               int32_t _length,
   4812               const char16_t *srcChars,
   4813               int32_t srcStart,
   4814               int32_t srcLength)
   4815 { return doReplace(start, _length, srcChars, srcStart, srcLength); }
   4816 
   4817 inline UnicodeString&
   4818 UnicodeString::replace(int32_t start,
   4819               int32_t _length,
   4820               char16_t srcChar)
   4821 { return doReplace(start, _length, &srcChar, 0, 1); }
   4822 
   4823 inline UnicodeString&
   4824 UnicodeString::replaceBetween(int32_t start,
   4825                  int32_t limit,
   4826                  const UnicodeString& srcText)
   4827 { return doReplace(start, limit - start, srcText, 0, srcText.length()); }
   4828 
   4829 inline UnicodeString&
   4830 UnicodeString::replaceBetween(int32_t start,
   4831                  int32_t limit,
   4832                  const UnicodeString& srcText,
   4833                  int32_t srcStart,
   4834                  int32_t srcLimit)
   4835 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
   4836 
   4837 inline UnicodeString&
   4838 UnicodeString::findAndReplace(const UnicodeString& oldText,
   4839                  const UnicodeString& newText)
   4840 { return findAndReplace(0, length(), oldText, 0, oldText.length(),
   4841            newText, 0, newText.length()); }
   4842 
   4843 inline UnicodeString&
   4844 UnicodeString::findAndReplace(int32_t start,
   4845                  int32_t _length,
   4846                  const UnicodeString& oldText,
   4847                  const UnicodeString& newText)
   4848 { return findAndReplace(start, _length, oldText, 0, oldText.length(),
   4849            newText, 0, newText.length()); }
   4850 
   4851 // ============================
   4852 // extract
   4853 // ============================
   4854 inline void
   4855 UnicodeString::doExtract(int32_t start,
   4856             int32_t _length,
   4857             UnicodeString& target) const
   4858 { target.replace(0, target.length(), *this, start, _length); }
   4859 
   4860 inline void
   4861 UnicodeString::extract(int32_t start,
   4862               int32_t _length,
   4863               Char16Ptr target,
   4864               int32_t targetStart) const
   4865 { doExtract(start, _length, target, targetStart); }
   4866 
   4867 inline void
   4868 UnicodeString::extract(int32_t start,
   4869               int32_t _length,
   4870               UnicodeString& target) const
   4871 { doExtract(start, _length, target); }
   4872 
   4873 #if !UCONFIG_NO_CONVERSION
   4874 
   4875 inline int32_t
   4876 UnicodeString::extract(int32_t start,
   4877               int32_t _length,
   4878               char *dst,
   4879               const char *codepage) const
   4880 
   4881 {
   4882  // This dstSize value will be checked explicitly
   4883  return extract(start, _length, dst, dst != nullptr ? 0xffffffff : 0, codepage);
   4884 }
   4885 
   4886 #endif
   4887 
   4888 inline void
   4889 UnicodeString::extractBetween(int32_t start,
   4890                  int32_t limit,
   4891                  char16_t *dst,
   4892                  int32_t dstStart) const {
   4893  pinIndex(start);
   4894  pinIndex(limit);
   4895  doExtract(start, limit - start, dst, dstStart);
   4896 }
   4897 
   4898 inline UnicodeString
   4899 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
   4900    return tempSubString(start, limit - start);
   4901 }
   4902 
   4903 inline char16_t
   4904 UnicodeString::doCharAt(int32_t offset) const
   4905 {
   4906  if (static_cast<uint32_t>(offset) < static_cast<uint32_t>(length())) {
   4907    return getArrayStart()[offset];
   4908  } else {
   4909    return kInvalidUChar;
   4910  }
   4911 }
   4912 
   4913 inline char16_t
   4914 UnicodeString::charAt(int32_t offset) const
   4915 { return doCharAt(offset); }
   4916 
   4917 inline char16_t
   4918 UnicodeString::operator[] (int32_t offset) const
   4919 { return doCharAt(offset); }
   4920 
   4921 inline UBool
   4922 UnicodeString::isEmpty() const {
   4923  // Arithmetic or logical right shift does not matter: only testing for 0.
   4924  return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0;
   4925 }
   4926 
   4927 //========================================
   4928 // Write implementation methods
   4929 //========================================
   4930 inline void
   4931 UnicodeString::setZeroLength() {
   4932  fUnion.fFields.fLengthAndFlags &= kAllStorageFlags;
   4933 }
   4934 
   4935 inline void
   4936 UnicodeString::setShortLength(int32_t len) {
   4937  // requires 0 <= len <= kMaxShortLength
   4938  fUnion.fFields.fLengthAndFlags =
   4939    static_cast<int16_t>((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift));
   4940 }
   4941 
   4942 inline void
   4943 UnicodeString::setLength(int32_t len) {
   4944  if(len <= kMaxShortLength) {
   4945    setShortLength(len);
   4946  } else {
   4947    fUnion.fFields.fLengthAndFlags |= kLengthIsLarge;
   4948    fUnion.fFields.fLength = len;
   4949  }
   4950 }
   4951 
   4952 inline void
   4953 UnicodeString::setToEmpty() {
   4954  fUnion.fFields.fLengthAndFlags = kShortString;
   4955 }
   4956 
   4957 inline void
   4958 UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) {
   4959  setLength(len);
   4960  fUnion.fFields.fArray = array;
   4961  fUnion.fFields.fCapacity = capacity;
   4962 }
   4963 
   4964 inline UnicodeString&
   4965 UnicodeString::operator= (char16_t ch)
   4966 { return doReplace(0, length(), &ch, 0, 1); }
   4967 
   4968 inline UnicodeString&
   4969 UnicodeString::operator= (UChar32 ch)
   4970 { return replace(0, length(), ch); }
   4971 
   4972 inline UnicodeString&
   4973 UnicodeString::setTo(const UnicodeString& srcText,
   4974             int32_t srcStart,
   4975             int32_t srcLength)
   4976 {
   4977  unBogus();
   4978  return doReplace(0, length(), srcText, srcStart, srcLength);
   4979 }
   4980 
   4981 inline UnicodeString&
   4982 UnicodeString::setTo(const UnicodeString& srcText,
   4983             int32_t srcStart)
   4984 {
   4985  unBogus();
   4986  srcText.pinIndex(srcStart);
   4987  return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
   4988 }
   4989 
   4990 inline UnicodeString&
   4991 UnicodeString::setTo(const UnicodeString& srcText)
   4992 {
   4993  return copyFrom(srcText);
   4994 }
   4995 
   4996 inline UnicodeString&
   4997 UnicodeString::setTo(const char16_t *srcChars,
   4998             int32_t srcLength)
   4999 {
   5000  unBogus();
   5001  return doReplace(0, length(), srcChars, 0, srcLength);
   5002 }
   5003 
   5004 inline UnicodeString&
   5005 UnicodeString::setTo(char16_t srcChar)
   5006 {
   5007  unBogus();
   5008  return doReplace(0, length(), &srcChar, 0, 1);
   5009 }
   5010 
   5011 inline UnicodeString&
   5012 UnicodeString::setTo(UChar32 srcChar)
   5013 {
   5014  unBogus();
   5015  return replace(0, length(), srcChar);
   5016 }
   5017 
   5018 inline UnicodeString&
   5019 UnicodeString::append(const UnicodeString& srcText,
   5020              int32_t srcStart,
   5021              int32_t srcLength)
   5022 { return doAppend(srcText, srcStart, srcLength); }
   5023 
   5024 inline UnicodeString&
   5025 UnicodeString::append(const UnicodeString& srcText)
   5026 { return doAppend(srcText, 0, srcText.length()); }
   5027 
   5028 inline UnicodeString&
   5029 UnicodeString::append(const char16_t *srcChars,
   5030              int32_t srcStart,
   5031              int32_t srcLength)
   5032 { return doAppend(srcChars, srcStart, srcLength); }
   5033 
   5034 inline UnicodeString&
   5035 UnicodeString::append(ConstChar16Ptr srcChars,
   5036              int32_t srcLength)
   5037 { return doAppend(srcChars, 0, srcLength); }
   5038 
   5039 inline UnicodeString&
   5040 UnicodeString::append(char16_t srcChar)
   5041 { return doAppend(&srcChar, 0, 1); }
   5042 
   5043 inline UnicodeString&
   5044 UnicodeString::operator+= (char16_t ch)
   5045 { return doAppend(&ch, 0, 1); }
   5046 
   5047 inline UnicodeString&
   5048 UnicodeString::operator+= (UChar32 ch) {
   5049  return append(ch);
   5050 }
   5051 
   5052 inline UnicodeString&
   5053 UnicodeString::operator+= (const UnicodeString& srcText)
   5054 { return doAppend(srcText, 0, srcText.length()); }
   5055 
   5056 inline UnicodeString&
   5057 UnicodeString::insert(int32_t start,
   5058              const UnicodeString& srcText,
   5059              int32_t srcStart,
   5060              int32_t srcLength)
   5061 { return doReplace(start, 0, srcText, srcStart, srcLength); }
   5062 
   5063 inline UnicodeString&
   5064 UnicodeString::insert(int32_t start,
   5065              const UnicodeString& srcText)
   5066 { return doReplace(start, 0, srcText, 0, srcText.length()); }
   5067 
   5068 inline UnicodeString&
   5069 UnicodeString::insert(int32_t start,
   5070              const char16_t *srcChars,
   5071              int32_t srcStart,
   5072              int32_t srcLength)
   5073 { return doReplace(start, 0, srcChars, srcStart, srcLength); }
   5074 
   5075 inline UnicodeString&
   5076 UnicodeString::insert(int32_t start,
   5077              ConstChar16Ptr srcChars,
   5078              int32_t srcLength)
   5079 { return doReplace(start, 0, srcChars, 0, srcLength); }
   5080 
   5081 inline UnicodeString&
   5082 UnicodeString::insert(int32_t start,
   5083              char16_t srcChar)
   5084 { return doReplace(start, 0, &srcChar, 0, 1); }
   5085 
   5086 inline UnicodeString&
   5087 UnicodeString::insert(int32_t start,
   5088              UChar32 srcChar)
   5089 { return replace(start, 0, srcChar); }
   5090 
   5091 
   5092 inline UnicodeString&
   5093 UnicodeString::remove()
   5094 {
   5095  // remove() of a bogus string makes the string empty and non-bogus
   5096  if(isBogus()) {
   5097    setToEmpty();
   5098  } else {
   5099    setZeroLength();
   5100  }
   5101  return *this;
   5102 }
   5103 
   5104 inline UnicodeString&
   5105 UnicodeString::remove(int32_t start,
   5106             int32_t _length)
   5107 {
   5108    if(start <= 0 && _length == INT32_MAX) {
   5109        // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
   5110        return remove();
   5111    }
   5112    return doReplace(start, _length, nullptr, 0, 0);
   5113 }
   5114 
   5115 inline UnicodeString&
   5116 UnicodeString::removeBetween(int32_t start,
   5117                int32_t limit)
   5118 { return doReplace(start, limit - start, nullptr, 0, 0); }
   5119 
   5120 inline UnicodeString &
   5121 UnicodeString::retainBetween(int32_t start, int32_t limit) {
   5122  truncate(limit);
   5123  return doReplace(0, start, nullptr, 0, 0);
   5124 }
   5125 
   5126 inline UBool
   5127 UnicodeString::truncate(int32_t targetLength)
   5128 {
   5129  if(isBogus() && targetLength == 0) {
   5130    // truncate(0) of a bogus string makes the string empty and non-bogus
   5131    unBogus();
   5132    return false;
   5133  } else if (static_cast<uint32_t>(targetLength) < static_cast<uint32_t>(length())) {
   5134    setLength(targetLength);
   5135    return true;
   5136  } else {
   5137    return false;
   5138  }
   5139 }
   5140 
   5141 inline UnicodeString&
   5142 UnicodeString::reverse()
   5143 { return doReverse(0, length()); }
   5144 
   5145 inline UnicodeString&
   5146 UnicodeString::reverse(int32_t start,
   5147               int32_t _length)
   5148 { return doReverse(start, _length); }
   5149 
   5150 U_NAMESPACE_END
   5151 
   5152 #endif /* U_SHOW_CPLUSPLUS_API */
   5153 
   5154 #endif
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE