tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

unistr.cpp (62125B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ******************************************************************************
      5 * Copyright (C) 1999-2016, International Business Machines Corporation and
      6 * others. All Rights Reserved.
      7 ******************************************************************************
      8 *
      9 * File unistr.cpp
     10 *
     11 * Modification History:
     12 *
     13 *   Date        Name        Description
     14 *   09/25/98    stephen     Creation.
     15 *   04/20/99    stephen     Overhauled per 4/16 code review.
     16 *   07/09/99    stephen     Renamed {hi,lo},{byte,word} to icu_X for HP/UX
     17 *   11/18/99    aliu        Added handleReplaceBetween() to make inherit from
     18 *                           Replaceable.
     19 *   06/25/01    grhoten     Removed the dependency on iostream
     20 ******************************************************************************
     21 */
     22 
     23 #include <string_view>
     24 
     25 #include "unicode/utypes.h"
     26 #include "unicode/appendable.h"
     27 #include "unicode/putil.h"
     28 #include "cstring.h"
     29 #include "cmemory.h"
     30 #include "unicode/ustring.h"
     31 #include "unicode/unistr.h"
     32 #include "unicode/utf.h"
     33 #include "unicode/utf16.h"
     34 #include "uelement.h"
     35 #include "ustr_imp.h"
     36 #include "umutex.h"
     37 #include "uassert.h"
     38 
     39 #if 0
     40 
     41 #include <iostream>
     42 using namespace std;
     43 
     44 //DEBUGGING
     45 void
     46 print(const UnicodeString& s,
     47      const char *name)
     48 {
     49  char16_t c;
     50  cout << name << ":|";
     51  for(int i = 0; i < s.length(); ++i) {
     52    c = s[i];
     53    if(c>= 0x007E || c < 0x0020)
     54      cout << "[0x" << hex << s[i] << "]";
     55    else
     56      cout << (char) s[i];
     57  }
     58  cout << '|' << endl;
     59 }
     60 
     61 void
     62 print(const char16_t *s,
     63      int32_t len,
     64      const char *name)
     65 {
     66  char16_t c;
     67  cout << name << ":|";
     68  for(int i = 0; i < len; ++i) {
     69    c = s[i];
     70    if(c>= 0x007E || c < 0x0020)
     71      cout << "[0x" << hex << s[i] << "]";
     72    else
     73      cout << (char) s[i];
     74  }
     75  cout << '|' << endl;
     76 }
     77 // END DEBUGGING
     78 #endif
     79 
     80 // Local function definitions for now
     81 
     82 // need to copy areas that may overlap
     83 static
     84 inline void
     85 us_arrayCopy(const char16_t *src, int32_t srcStart,
     86         char16_t *dst, int32_t dstStart, int32_t count)
     87 {
     88  if(count>0) {
     89    uprv_memmove(dst+dstStart, src+srcStart, (size_t)count*sizeof(*src));
     90  }
     91 }
     92 
     93 // u_unescapeAt() callback to get a char16_t from a UnicodeString
     94 U_CDECL_BEGIN
     95 static char16_t U_CALLCONV
     96 UnicodeString_charAt(int32_t offset, void *context) {
     97    return ((icu::UnicodeString*) context)->charAt(offset);
     98 }
     99 U_CDECL_END
    100 
    101 U_NAMESPACE_BEGIN
    102 
    103 /* The Replaceable virtual destructor can't be defined in the header
    104   due to how AIX works with multiple definitions of virtual functions.
    105 */
    106 Replaceable::~Replaceable() {}
    107 
    108 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
    109 
    110 UnicodeString U_EXPORT2
    111 operator+ (const UnicodeString &s1, const UnicodeString &s2) {
    112  int32_t sumLengths;
    113  if (uprv_add32_overflow(s1.length(), s2.length(), &sumLengths)) {
    114    UnicodeString bogus;
    115    bogus.setToBogus();
    116    return bogus;
    117  }
    118  if (sumLengths != INT32_MAX) {
    119    ++sumLengths;  // space for a terminating NUL if we need one
    120  }
    121  return UnicodeString(sumLengths, static_cast<UChar32>(0), 0).append(s1).append(s2);
    122 }
    123 
    124 U_COMMON_API UnicodeString U_EXPORT2
    125 unistr_internalConcat(const UnicodeString &s1, std::u16string_view s2) {
    126  int32_t sumLengths;
    127  if (s2.length() > INT32_MAX ||
    128      uprv_add32_overflow(s1.length(), static_cast<int32_t>(s2.length()), &sumLengths)) {
    129    UnicodeString bogus;
    130    bogus.setToBogus();
    131    return bogus;
    132  }
    133  if (sumLengths != INT32_MAX) {
    134    ++sumLengths;  // space for a terminating NUL if we need one
    135  }
    136  return UnicodeString(sumLengths, static_cast<UChar32>(0), 0).append(s1).append(s2);
    137 }
    138 
    139 
    140 //========================================
    141 // Reference Counting functions, put at top of file so that optimizing compilers
    142 //                               have a chance to automatically inline.
    143 //========================================
    144 
    145 void
    146 UnicodeString::addRef() {
    147  umtx_atomic_inc(reinterpret_cast<u_atomic_int32_t*>(fUnion.fFields.fArray) - 1);
    148 }
    149 
    150 int32_t
    151 UnicodeString::removeRef() {
    152  return umtx_atomic_dec(reinterpret_cast<u_atomic_int32_t*>(fUnion.fFields.fArray) - 1);
    153 }
    154 
    155 int32_t
    156 UnicodeString::refCount() const {
    157  return umtx_loadAcquire(*(reinterpret_cast<u_atomic_int32_t*>(fUnion.fFields.fArray) - 1));
    158 }
    159 
    160 void
    161 UnicodeString::releaseArray() {
    162  if((fUnion.fFields.fLengthAndFlags & kRefCounted) && removeRef() == 0) {
    163    uprv_free(reinterpret_cast<int32_t*>(fUnion.fFields.fArray) - 1);
    164  }
    165 }
    166 
    167 
    168 
    169 //========================================
    170 // Constructors
    171 //========================================
    172 
    173 // The default constructor is inline in unistr.h.
    174 
    175 UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) {
    176  fUnion.fFields.fLengthAndFlags = 0;
    177  if (count <= 0 || static_cast<uint32_t>(c) > 0x10ffff) {
    178    // just allocate and do not do anything else
    179    allocate(capacity);
    180  } else if(c <= 0xffff) {
    181    int32_t length = count;
    182    if(capacity < length) {
    183      capacity = length;
    184    }
    185    if(allocate(capacity)) {
    186      char16_t *array = getArrayStart();
    187      char16_t unit = static_cast<char16_t>(c);
    188      for(int32_t i = 0; i < length; ++i) {
    189        array[i] = unit;
    190      }
    191      setLength(length);
    192    }
    193  } else {  // supplementary code point, write surrogate pairs
    194    if(count > (INT32_MAX / 2)) {
    195      // We would get more than 2G UChars.
    196      allocate(capacity);
    197      return;
    198    }
    199    int32_t length = count * 2;
    200    if(capacity < length) {
    201      capacity = length;
    202    }
    203    if(allocate(capacity)) {
    204      char16_t *array = getArrayStart();
    205      char16_t lead = U16_LEAD(c);
    206      char16_t trail = U16_TRAIL(c);
    207      for(int32_t i = 0; i < length; i += 2) {
    208        array[i] = lead;
    209        array[i + 1] = trail;
    210      }
    211      setLength(length);
    212    }
    213  }
    214 }
    215 
    216 UnicodeString::UnicodeString(char16_t ch) {
    217  fUnion.fFields.fLengthAndFlags = kLength1 | kShortString;
    218  fUnion.fStackFields.fBuffer[0] = ch;
    219 }
    220 
    221 UnicodeString::UnicodeString(UChar32 ch) {
    222  fUnion.fFields.fLengthAndFlags = kShortString;
    223  int32_t i = 0;
    224  UBool isError = false;
    225  U16_APPEND(fUnion.fStackFields.fBuffer, i, US_STACKBUF_SIZE, ch, isError);
    226  // We test isError so that the compiler does not complain that we don't.
    227  // If isError then i==0 which is what we want anyway.
    228  if(!isError) {
    229    setShortLength(i);
    230  }
    231 }
    232 
    233 UnicodeString::UnicodeString(const char16_t *text,
    234                             int32_t textLength) {
    235  fUnion.fFields.fLengthAndFlags = kShortString;
    236  doAppend(text, 0, textLength);
    237 }
    238 
    239 UnicodeString::UnicodeString(UBool isTerminated,
    240                             ConstChar16Ptr textPtr,
    241                             int32_t textLength) {
    242  fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
    243  const char16_t *text = textPtr;
    244  if(text == nullptr) {
    245    // treat as an empty string, do not alias
    246    setToEmpty();
    247  } else if(textLength < -1 ||
    248            (textLength == -1 && !isTerminated) ||
    249            (textLength >= 0 && isTerminated && text[textLength] != 0)
    250  ) {
    251    setToBogus();
    252  } else {
    253    if(textLength == -1) {
    254      // text is terminated, or else it would have failed the above test
    255      textLength = u_strlen(text);
    256    }
    257    setArray(const_cast<char16_t *>(text), textLength,
    258             isTerminated ? textLength + 1 : textLength);
    259  }
    260 }
    261 
    262 UnicodeString::UnicodeString(char16_t *buff,
    263                             int32_t buffLength,
    264                             int32_t buffCapacity) {
    265  fUnion.fFields.fLengthAndFlags = kWritableAlias;
    266  if(buff == nullptr) {
    267    // treat as an empty string, do not alias
    268    setToEmpty();
    269  } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
    270    setToBogus();
    271  } else {
    272    if(buffLength == -1) {
    273      // fLength = u_strlen(buff); but do not look beyond buffCapacity
    274      const char16_t *p = buff, *limit = buff + buffCapacity;
    275      while(p != limit && *p != 0) {
    276        ++p;
    277      }
    278      buffLength = static_cast<int32_t>(p - buff);
    279    }
    280    setArray(buff, buffLength, buffCapacity);
    281  }
    282 }
    283 
    284 UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) {
    285  fUnion.fFields.fLengthAndFlags = kShortString;
    286  if(src==nullptr) {
    287    // treat as an empty string
    288  } else {
    289    if(length<0) {
    290      length = static_cast<int32_t>(uprv_strlen(src));
    291    }
    292    if(cloneArrayIfNeeded(length, length, false)) {
    293      u_charsToUChars(src, getArrayStart(), length);
    294      setLength(length);
    295    } else {
    296      setToBogus();
    297    }
    298  }
    299 }
    300 
    301 UnicodeString UnicodeString::readOnlyAliasFromU16StringView(std::u16string_view text) {
    302  UnicodeString result;
    303  if (text.length() <= INT32_MAX) {
    304    result.setTo(false, text.data(), static_cast<int32_t>(text.length()));
    305  } else {
    306    result.setToBogus();
    307  }
    308  return result;
    309 }
    310 
    311 UnicodeString UnicodeString::readOnlyAliasFromUnicodeString(const UnicodeString &text) {
    312  UnicodeString result;
    313  if (text.isBogus()) {
    314    result.setToBogus();
    315  } else {
    316    result.setTo(false, text.getBuffer(), text.length());
    317  }
    318  return result;
    319 }
    320 
    321 #if U_CHARSET_IS_UTF8
    322 
    323 UnicodeString::UnicodeString(const char *codepageData) {
    324  fUnion.fFields.fLengthAndFlags = kShortString;
    325  if (codepageData != nullptr) {
    326    setToUTF8(codepageData);
    327  }
    328 }
    329 
    330 UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength) {
    331  fUnion.fFields.fLengthAndFlags = kShortString;
    332  // if there's nothing to convert, do nothing
    333  if (codepageData == nullptr || dataLength == 0 || dataLength < -1) {
    334    return;
    335  }
    336  if(dataLength == -1) {
    337    dataLength = static_cast<int32_t>(uprv_strlen(codepageData));
    338  }
    339  setToUTF8(StringPiece(codepageData, dataLength));
    340 }
    341 
    342 // else see unistr_cnv.cpp
    343 #endif
    344 
    345 UnicodeString::UnicodeString(const UnicodeString& that) {
    346  fUnion.fFields.fLengthAndFlags = kShortString;
    347  copyFrom(that);
    348 }
    349 
    350 UnicodeString::UnicodeString(UnicodeString &&src) noexcept {
    351  copyFieldsFrom(src, true);
    352 }
    353 
    354 UnicodeString::UnicodeString(const UnicodeString& that,
    355                             int32_t srcStart) {
    356  fUnion.fFields.fLengthAndFlags = kShortString;
    357  setTo(that, srcStart);
    358 }
    359 
    360 UnicodeString::UnicodeString(const UnicodeString& that,
    361                             int32_t srcStart,
    362                             int32_t srcLength) {
    363  fUnion.fFields.fLengthAndFlags = kShortString;
    364  setTo(that, srcStart, srcLength);
    365 }
    366 
    367 // Replaceable base class clone() default implementation, does not clone
    368 Replaceable *
    369 Replaceable::clone() const {
    370  return nullptr;
    371 }
    372 
    373 // UnicodeString overrides clone() with a real implementation
    374 UnicodeString *
    375 UnicodeString::clone() const {
    376  LocalPointer<UnicodeString> clonedString(new UnicodeString(*this));
    377  return clonedString.isValid() && !clonedString->isBogus() ? clonedString.orphan() : nullptr;
    378 }
    379 
    380 //========================================
    381 // array allocation
    382 //========================================
    383 
    384 namespace {
    385 
    386 const int32_t kGrowSize = 128;
    387 
    388 // The number of bytes for one int32_t reference counter and capacity UChars
    389 // must fit into a 32-bit size_t (at least when on a 32-bit platform).
    390 // We also add one for the NUL terminator, to avoid reallocation in getTerminatedBuffer(),
    391 // and round up to a multiple of 16 bytes.
    392 // This means that capacity must be at most (0xfffffff0 - 4) / 2 - 1 = 0x7ffffff5.
    393 // (With more complicated checks we could go up to 0x7ffffffd without rounding up,
    394 // but that does not seem worth it.)
    395 const int32_t kMaxCapacity = 0x7ffffff5;
    396 
    397 int32_t getGrowCapacity(int32_t newLength) {
    398  int32_t growSize = (newLength >> 2) + kGrowSize;
    399  if(growSize <= (kMaxCapacity - newLength)) {
    400    return newLength + growSize;
    401  } else {
    402    return kMaxCapacity;
    403  }
    404 }
    405 
    406 }  // namespace
    407 
    408 UBool
    409 UnicodeString::allocate(int32_t capacity) {
    410  if(capacity <= US_STACKBUF_SIZE) {
    411    fUnion.fFields.fLengthAndFlags = kShortString;
    412    return true;
    413  }
    414  if(capacity <= kMaxCapacity) {
    415    ++capacity;  // for the NUL
    416    // Switch to size_t which is unsigned so that we can allocate up to 4GB.
    417    // Reference counter + UChars.
    418    size_t numBytes = sizeof(int32_t) + static_cast<size_t>(capacity) * U_SIZEOF_UCHAR;
    419    // Round up to a multiple of 16.
    420    numBytes = (numBytes + 15) & ~15;
    421    int32_t* array = static_cast<int32_t*>(uprv_malloc(numBytes));
    422    if(array != nullptr) {
    423      // set initial refCount and point behind the refCount
    424      *array++ = 1;
    425      numBytes -= sizeof(int32_t);
    426 
    427      // have fArray point to the first char16_t
    428      fUnion.fFields.fArray = reinterpret_cast<char16_t*>(array);
    429      fUnion.fFields.fCapacity = static_cast<int32_t>(numBytes / U_SIZEOF_UCHAR);
    430      fUnion.fFields.fLengthAndFlags = kLongString;
    431      return true;
    432    }
    433  }
    434  fUnion.fFields.fLengthAndFlags = kIsBogus;
    435  fUnion.fFields.fArray = nullptr;
    436  fUnion.fFields.fCapacity = 0;
    437  return false;
    438 }
    439 
    440 //========================================
    441 // Destructor
    442 //========================================
    443 
    444 #ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
    445 static u_atomic_int32_t finalLengthCounts[0x400];  // UnicodeString::kMaxShortLength+1
    446 static u_atomic_int32_t beyondCount(0);
    447 
    448 U_CAPI void unistr_printLengths() {
    449  int32_t i;
    450  for(i = 0; i <= 59; ++i) {
    451    printf("%2d,  %9d\n", i, (int32_t)finalLengthCounts[i]);
    452  }
    453  int32_t beyond = beyondCount;
    454  for(; i < UPRV_LENGTHOF(finalLengthCounts); ++i) {
    455    beyond += finalLengthCounts[i];
    456  }
    457  printf(">59, %9d\n", beyond);
    458 }
    459 #endif
    460 
    461 UnicodeString::~UnicodeString()
    462 {
    463 #ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
    464  // Count lengths of strings at the end of their lifetime.
    465  // Useful for discussion of a desirable stack buffer size.
    466  // Count the contents length, not the optional NUL terminator nor further capacity.
    467  // Ignore open-buffer strings and strings which alias external storage.
    468  if((fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kReadonlyAlias|kWritableAlias)) == 0) {
    469    if(hasShortLength()) {
    470      umtx_atomic_inc(finalLengthCounts + getShortLength());
    471    } else {
    472      umtx_atomic_inc(&beyondCount);
    473    }
    474  }
    475 #endif
    476 
    477  releaseArray();
    478 }
    479 
    480 //========================================
    481 // Factory methods
    482 //========================================
    483 
    484 UnicodeString UnicodeString::fromUTF8(StringPiece utf8) {
    485  UnicodeString result;
    486  result.setToUTF8(utf8);
    487  return result;
    488 }
    489 
    490 UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
    491  UnicodeString result;
    492  int32_t capacity;
    493  // Most UTF-32 strings will be BMP-only and result in a same-length
    494  // UTF-16 string. We overestimate the capacity just slightly,
    495  // just in case there are a few supplementary characters.
    496  if(length <= US_STACKBUF_SIZE) {
    497    capacity = US_STACKBUF_SIZE;
    498  } else {
    499    capacity = length + (length >> 4) + 4;
    500  }
    501  do {
    502    char16_t *utf16 = result.getBuffer(capacity);
    503    int32_t length16;
    504    UErrorCode errorCode = U_ZERO_ERROR;
    505    u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
    506        utf32, length,
    507        0xfffd,  // Substitution character.
    508        nullptr,    // Don't care about number of substitutions.
    509        &errorCode);
    510    result.releaseBuffer(length16);
    511    if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
    512      capacity = length16 + 1;  // +1 for the terminating NUL.
    513      continue;
    514    } else if(U_FAILURE(errorCode)) {
    515      result.setToBogus();
    516    }
    517    break;
    518  } while(true);
    519  return result;
    520 }
    521 
    522 //========================================
    523 // Assignment
    524 //========================================
    525 
    526 UnicodeString &
    527 UnicodeString::operator=(const UnicodeString &src) {
    528  return copyFrom(src);
    529 }
    530 
    531 UnicodeString &
    532 UnicodeString::fastCopyFrom(const UnicodeString &src) {
    533  return copyFrom(src, true);
    534 }
    535 
    536 UnicodeString &
    537 UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
    538  // if assigning to ourselves, do nothing
    539  if(this == &src) {
    540    return *this;
    541  }
    542 
    543  // is the right side bogus?
    544  if(src.isBogus()) {
    545    setToBogus();
    546    return *this;
    547  }
    548 
    549  // delete the current contents
    550  releaseArray();
    551 
    552  if(src.isEmpty()) {
    553    // empty string - use the stack buffer
    554    setToEmpty();
    555    return *this;
    556  }
    557 
    558  // fLength>0 and not an "open" src.getBuffer(minCapacity)
    559  fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
    560  switch(src.fUnion.fFields.fLengthAndFlags & kAllStorageFlags) {
    561  case kShortString:
    562    // short string using the stack buffer, do the same
    563    uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
    564                getShortLength() * U_SIZEOF_UCHAR);
    565    break;
    566  case kLongString:
    567    // src uses a refCounted string buffer, use that buffer with refCount
    568    // src is const, use a cast - we don't actually change it
    569    const_cast<UnicodeString &>(src).addRef();
    570    // copy all fields, share the reference-counted buffer
    571    fUnion.fFields.fArray = src.fUnion.fFields.fArray;
    572    fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
    573    if(!hasShortLength()) {
    574      fUnion.fFields.fLength = src.fUnion.fFields.fLength;
    575    }
    576    break;
    577  case kReadonlyAlias:
    578    if(fastCopy) {
    579      // src is a readonly alias, do the same
    580      // -> maintain the readonly alias as such
    581      fUnion.fFields.fArray = src.fUnion.fFields.fArray;
    582      fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
    583      if(!hasShortLength()) {
    584        fUnion.fFields.fLength = src.fUnion.fFields.fLength;
    585      }
    586      break;
    587    }
    588    // else if(!fastCopy) fall through to case kWritableAlias
    589    // -> allocate a new buffer and copy the contents
    590    U_FALLTHROUGH;
    591  case kWritableAlias: {
    592    // src is a writable alias; we make a copy of that instead
    593    int32_t srcLength = src.length();
    594    if(allocate(srcLength)) {
    595      u_memcpy(getArrayStart(), src.getArrayStart(), srcLength);
    596      setLength(srcLength);
    597      break;
    598    }
    599    // if there is not enough memory, then fall through to setting to bogus
    600    U_FALLTHROUGH;
    601  }
    602  default:
    603    // if src is bogus, set ourselves to bogus
    604    // do not call setToBogus() here because fArray and flags are not consistent here
    605    fUnion.fFields.fLengthAndFlags = kIsBogus;
    606    fUnion.fFields.fArray = nullptr;
    607    fUnion.fFields.fCapacity = 0;
    608    break;
    609  }
    610 
    611  return *this;
    612 }
    613 
    614 UnicodeString &UnicodeString::operator=(UnicodeString &&src) noexcept {
    615  // No explicit check for self move assignment, consistent with standard library.
    616  // Self move assignment causes no crash nor leak but might make the object bogus.
    617  releaseArray();
    618  copyFieldsFrom(src, true);
    619  return *this;
    620 }
    621 
    622 // Same as move assignment except without memory management.
    623 void UnicodeString::copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) noexcept {
    624  int16_t lengthAndFlags = fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
    625  if(lengthAndFlags & kUsingStackBuffer) {
    626    // Short string using the stack buffer, copy the contents.
    627    // Check for self assignment to prevent "overlap in memcpy" warnings,
    628    // although it should be harmless to copy a buffer to itself exactly.
    629    if(this != &src) {
    630      uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
    631                  getShortLength() * U_SIZEOF_UCHAR);
    632    }
    633  } else {
    634    // In all other cases, copy all fields.
    635    fUnion.fFields.fArray = src.fUnion.fFields.fArray;
    636    fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
    637    if(!hasShortLength()) {
    638      fUnion.fFields.fLength = src.fUnion.fFields.fLength;
    639    }
    640    if(setSrcToBogus) {
    641      // Set src to bogus without releasing any memory.
    642      src.fUnion.fFields.fLengthAndFlags = kIsBogus;
    643      src.fUnion.fFields.fArray = nullptr;
    644      src.fUnion.fFields.fCapacity = 0;
    645    }
    646  }
    647 }
    648 
    649 void UnicodeString::swap(UnicodeString &other) noexcept {
    650  UnicodeString temp;  // Empty short string: Known not to need releaseArray().
    651  // Copy fields without resetting source values in between.
    652  temp.copyFieldsFrom(*this, false);
    653  this->copyFieldsFrom(other, false);
    654  other.copyFieldsFrom(temp, false);
    655  // Set temp to an empty string so that other's memory is not released twice.
    656  temp.fUnion.fFields.fLengthAndFlags = kShortString;
    657 }
    658 
    659 //========================================
    660 // Miscellaneous operations
    661 //========================================
    662 
    663 UnicodeString UnicodeString::unescape() const {
    664    UnicodeString result(length(), static_cast<UChar32>(0), static_cast<int32_t>(0)); // construct with capacity
    665    if (result.isBogus()) {
    666        return result;
    667    }
    668    const char16_t *array = getBuffer();
    669    int32_t len = length();
    670    int32_t prev = 0;
    671    for (int32_t i=0;;) {
    672        if (i == len) {
    673            result.append(array, prev, len - prev);
    674            break;
    675        }
    676        if (array[i++] == 0x5C /*'\\'*/) {
    677            result.append(array, prev, (i - 1) - prev);
    678            UChar32 c = unescapeAt(i); // advances i
    679            if (c < 0) {
    680                result.remove(); // return empty string
    681                break; // invalid escape sequence
    682            }
    683            result.append(c);
    684            prev = i;
    685        }
    686    }
    687    return result;
    688 }
    689 
    690 UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
    691    return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
    692 }
    693 
    694 //========================================
    695 // Read-only implementation
    696 //========================================
    697 UBool
    698 UnicodeString::doEquals(const char16_t *text, int32_t len) const {
    699  // Requires: this not bogus and have same lengths.
    700  // Byte-wise comparison works for equality regardless of endianness.
    701  return uprv_memcmp(getArrayStart(), text, len * U_SIZEOF_UCHAR) == 0;
    702 }
    703 
    704 UBool
    705 UnicodeString::doEqualsSubstring( int32_t start,
    706              int32_t length,
    707              const char16_t *srcChars,
    708              int32_t srcStart,
    709              int32_t srcLength) const
    710 {
    711  // compare illegal string values
    712  if(isBogus()) {
    713    return false;
    714  }
    715  
    716  // pin indices to legal values
    717  pinIndices(start, length);
    718 
    719  if(srcChars == nullptr) {
    720    // treat const char16_t *srcChars==nullptr as an empty string
    721    return length == 0 ? true : false;
    722  }
    723 
    724  // get the correct pointer
    725  const char16_t *chars = getArrayStart();
    726 
    727  chars += start;
    728  srcChars += srcStart;
    729 
    730  // get the srcLength if necessary
    731  if(srcLength < 0) {
    732    srcLength = u_strlen(srcChars + srcStart);
    733  }
    734 
    735  if (length != srcLength) {
    736    return false;
    737  }
    738 
    739  if(length == 0 || chars == srcChars) {
    740    return true;
    741  }
    742 
    743  return u_memcmp(chars, srcChars, srcLength) == 0;
    744 }
    745 
    746 int8_t
    747 UnicodeString::doCompare( int32_t start,
    748              int32_t length,
    749              const char16_t *srcChars,
    750              int32_t srcStart,
    751              int32_t srcLength) const
    752 {
    753  // compare illegal string values
    754  if(isBogus()) {
    755    return -1;
    756  }
    757  
    758  // pin indices to legal values
    759  pinIndices(start, length);
    760 
    761  if(srcChars == nullptr) {
    762    // treat const char16_t *srcChars==nullptr as an empty string
    763    return length == 0 ? 0 : 1;
    764  }
    765 
    766  // get the correct pointer
    767  const char16_t *chars = getArrayStart();
    768 
    769  chars += start;
    770  srcChars += srcStart;
    771 
    772  int32_t minLength;
    773  int8_t lengthResult;
    774 
    775  // get the srcLength if necessary
    776  if(srcLength < 0) {
    777    srcLength = u_strlen(srcChars + srcStart);
    778  }
    779 
    780  // are we comparing different lengths?
    781  if(length != srcLength) {
    782    if(length < srcLength) {
    783      minLength = length;
    784      lengthResult = -1;
    785    } else {
    786      minLength = srcLength;
    787      lengthResult = 1;
    788    }
    789  } else {
    790    minLength = length;
    791    lengthResult = 0;
    792  }
    793 
    794  /*
    795   * note that uprv_memcmp() returns an int but we return an int8_t;
    796   * we need to take care not to truncate the result -
    797   * one way to do this is to right-shift the value to
    798   * move the sign bit into the lower 8 bits and making sure that this
    799   * does not become 0 itself
    800   */
    801 
    802  if(minLength > 0 && chars != srcChars) {
    803    int32_t result;
    804 
    805 #   if U_IS_BIG_ENDIAN 
    806      // big-endian: byte comparison works
    807      result = uprv_memcmp(chars, srcChars, minLength * sizeof(char16_t));
    808      if(result != 0) {
    809        return (int8_t)(result >> 15 | 1);
    810      }
    811 #   else
    812      // little-endian: compare char16_t units
    813      do {
    814        result = static_cast<int32_t>(*(chars++)) - static_cast<int32_t>(*(srcChars++));
    815        if(result != 0) {
    816          return static_cast<int8_t>(result >> 15 | 1);
    817        }
    818      } while(--minLength > 0);
    819 #   endif
    820  }
    821  return lengthResult;
    822 }
    823 
    824 /* String compare in code point order - doCompare() compares in code unit order. */
    825 int8_t
    826 UnicodeString::doCompareCodePointOrder(int32_t start,
    827                                       int32_t length,
    828                                       const char16_t *srcChars,
    829                                       int32_t srcStart,
    830                                       int32_t srcLength) const
    831 {
    832  // compare illegal string values
    833  // treat const char16_t *srcChars==nullptr as an empty string
    834  if(isBogus()) {
    835    return -1;
    836  }
    837 
    838  // pin indices to legal values
    839  pinIndices(start, length);
    840 
    841  if(srcChars == nullptr) {
    842    srcStart = srcLength = 0;
    843  }
    844 
    845  int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=nullptr)?(srcChars + srcStart):nullptr, srcLength, false, true);
    846  /* translate the 32-bit result into an 8-bit one */
    847  if(diff!=0) {
    848    return static_cast<int8_t>(diff >> 15 | 1);
    849  } else {
    850    return 0;
    851  }
    852 }
    853 
    854 int32_t
    855 UnicodeString::getLength() const {
    856    return length();
    857 }
    858 
    859 char16_t
    860 UnicodeString::getCharAt(int32_t offset) const {
    861  return charAt(offset);
    862 }
    863 
    864 UChar32
    865 UnicodeString::getChar32At(int32_t offset) const {
    866  return char32At(offset);
    867 }
    868 
    869 UChar32
    870 UnicodeString::char32At(int32_t offset) const
    871 {
    872  int32_t len = length();
    873  if (static_cast<uint32_t>(offset) < static_cast<uint32_t>(len)) {
    874    const char16_t *array = getArrayStart();
    875    UChar32 c;
    876    U16_GET(array, 0, offset, len, c);
    877    return c;
    878  } else {
    879    return kInvalidUChar;
    880  }
    881 }
    882 
    883 int32_t
    884 UnicodeString::getChar32Start(int32_t offset) const {
    885  if (static_cast<uint32_t>(offset) < static_cast<uint32_t>(length())) {
    886    const char16_t *array = getArrayStart();
    887    U16_SET_CP_START(array, 0, offset);
    888    return offset;
    889  } else {
    890    return 0;
    891  }
    892 }
    893 
    894 int32_t
    895 UnicodeString::getChar32Limit(int32_t offset) const {
    896  int32_t len = length();
    897  if (static_cast<uint32_t>(offset) < static_cast<uint32_t>(len)) {
    898    const char16_t *array = getArrayStart();
    899    U16_SET_CP_LIMIT(array, 0, offset, len);
    900    return offset;
    901  } else {
    902    return len;
    903  }
    904 }
    905 
    906 int32_t
    907 UnicodeString::countChar32(int32_t start, int32_t length) const {
    908  pinIndices(start, length);
    909  // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for nullptr
    910  return u_countChar32(getArrayStart()+start, length);
    911 }
    912 
    913 UBool
    914 UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
    915  pinIndices(start, length);
    916  // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for nullptr
    917  return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
    918 }
    919 
    920 int32_t
    921 UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
    922  // pin index
    923  int32_t len = length();
    924  if(index<0) {
    925    index=0;
    926  } else if(index>len) {
    927    index=len;
    928  }
    929 
    930  const char16_t *array = getArrayStart();
    931  if(delta>0) {
    932    U16_FWD_N(array, index, len, delta);
    933  } else {
    934    U16_BACK_N(array, 0, index, -delta);
    935  }
    936 
    937  return index;
    938 }
    939 
    940 void
    941 UnicodeString::doExtract(int32_t start,
    942             int32_t length,
    943             char16_t *dst,
    944             int32_t dstStart) const
    945 {
    946  // pin indices to legal values
    947  pinIndices(start, length);
    948 
    949  // do not copy anything if we alias dst itself
    950  const char16_t *array = getArrayStart();
    951  if(array + start != dst + dstStart) {
    952    us_arrayCopy(array, start, dst, dstStart, length);
    953  }
    954 }
    955 
    956 int32_t
    957 UnicodeString::extract(Char16Ptr dest, int32_t destCapacity,
    958                       UErrorCode &errorCode) const {
    959  int32_t len = length();
    960  if(U_SUCCESS(errorCode)) {
    961    if (isBogus() || destCapacity < 0 || (destCapacity > 0 && dest == nullptr)) {
    962      errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    963    } else {
    964      const char16_t *array = getArrayStart();
    965      if(len>0 && len<=destCapacity && array!=dest) {
    966        u_memcpy(dest, array, len);
    967      }
    968      return u_terminateUChars(dest, destCapacity, len, &errorCode);
    969    }
    970  }
    971 
    972  return len;
    973 }
    974 
    975 int32_t
    976 UnicodeString::extract(int32_t start,
    977                       int32_t length,
    978                       char *target,
    979                       int32_t targetCapacity,
    980                       enum EInvariant) const
    981 {
    982  // if the arguments are illegal, then do nothing
    983  if(targetCapacity < 0 || (targetCapacity > 0 && target == nullptr)) {
    984    return 0;
    985  }
    986 
    987  // pin the indices to legal values
    988  pinIndices(start, length);
    989 
    990  if(length <= targetCapacity) {
    991    u_UCharsToChars(getArrayStart() + start, target, length);
    992  }
    993  UErrorCode status = U_ZERO_ERROR;
    994  return u_terminateChars(target, targetCapacity, length, &status);
    995 }
    996 
    997 UnicodeString
    998 UnicodeString::tempSubString(int32_t start, int32_t len) const {
    999  pinIndices(start, len);
   1000  const char16_t *array = getBuffer();  // not getArrayStart() to check kIsBogus & kOpenGetBuffer
   1001  if(array==nullptr) {
   1002    array=fUnion.fStackFields.fBuffer;  // anything not nullptr because that would make an empty string
   1003    len=-2;  // bogus result string
   1004  }
   1005  return UnicodeString(false, array + start, len);
   1006 }
   1007 
   1008 int32_t
   1009 UnicodeString::toUTF8(int32_t start, int32_t len,
   1010                      char *target, int32_t capacity) const {
   1011  pinIndices(start, len);
   1012  int32_t length8;
   1013  UErrorCode errorCode = U_ZERO_ERROR;
   1014  u_strToUTF8WithSub(target, capacity, &length8,
   1015                     getBuffer() + start, len,
   1016                     0xFFFD,  // Standard substitution character.
   1017                     nullptr,    // Don't care about number of substitutions.
   1018                     &errorCode);
   1019  return length8;
   1020 }
   1021 
   1022 #if U_CHARSET_IS_UTF8
   1023 
   1024 int32_t
   1025 UnicodeString::extract(int32_t start, int32_t len,
   1026                       char *target, uint32_t dstSize) const {
   1027  // if the arguments are illegal, then do nothing
   1028  if (/*dstSize < 0 || */(dstSize > 0 && target == nullptr)) {
   1029    return 0;
   1030  }
   1031  return toUTF8(start, len, target, dstSize <= 0x7fffffff ? static_cast<int32_t>(dstSize) : 0x7fffffff);
   1032 }
   1033 
   1034 // else see unistr_cnv.cpp
   1035 #endif
   1036 
   1037 void 
   1038 UnicodeString::extractBetween(int32_t start,
   1039                  int32_t limit,
   1040                  UnicodeString& target) const {
   1041  pinIndex(start);
   1042  pinIndex(limit);
   1043  doExtract(start, limit - start, target);
   1044 }
   1045 
   1046 // When converting from UTF-16 to UTF-8, the result will have at most 3 times
   1047 // as many bytes as the source has UChars.
   1048 // The "worst cases" are writing systems like Indic, Thai and CJK with
   1049 // 3:1 bytes:UChars.
   1050 void
   1051 UnicodeString::toUTF8(ByteSink &sink) const {
   1052  int32_t length16 = length();
   1053  if(length16 != 0) {
   1054    char stackBuffer[1024];
   1055    int32_t capacity = static_cast<int32_t>(sizeof(stackBuffer));
   1056    UBool utf8IsOwned = false;
   1057    char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
   1058                                      3*length16,
   1059                                      stackBuffer, capacity,
   1060                                      &capacity);
   1061    int32_t length8 = 0;
   1062    UErrorCode errorCode = U_ZERO_ERROR;
   1063    u_strToUTF8WithSub(utf8, capacity, &length8,
   1064                       getBuffer(), length16,
   1065                       0xFFFD,  // Standard substitution character.
   1066                       nullptr,    // Don't care about number of substitutions.
   1067                       &errorCode);
   1068    if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
   1069      utf8 = static_cast<char*>(uprv_malloc(length8));
   1070      if(utf8 != nullptr) {
   1071        utf8IsOwned = true;
   1072        errorCode = U_ZERO_ERROR;
   1073        u_strToUTF8WithSub(utf8, length8, &length8,
   1074                           getBuffer(), length16,
   1075                           0xFFFD,  // Standard substitution character.
   1076                           nullptr,    // Don't care about number of substitutions.
   1077                           &errorCode);
   1078      } else {
   1079        errorCode = U_MEMORY_ALLOCATION_ERROR;
   1080      }
   1081    }
   1082    if(U_SUCCESS(errorCode)) {
   1083      sink.Append(utf8, length8);
   1084      sink.Flush();
   1085    }
   1086    if(utf8IsOwned) {
   1087      uprv_free(utf8);
   1088    }
   1089  }
   1090 }
   1091 
   1092 int32_t
   1093 UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
   1094  int32_t length32=0;
   1095  if(U_SUCCESS(errorCode)) {
   1096    // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
   1097    u_strToUTF32WithSub(utf32, capacity, &length32,
   1098        getBuffer(), length(),
   1099        0xfffd,  // Substitution character.
   1100        nullptr,    // Don't care about number of substitutions.
   1101        &errorCode);
   1102  }
   1103  return length32;
   1104 }
   1105 
   1106 int32_t 
   1107 UnicodeString::indexOf(const char16_t *srcChars,
   1108               int32_t srcStart,
   1109               int32_t srcLength,
   1110               int32_t start,
   1111               int32_t length) const
   1112 {
   1113  if (isBogus() || srcChars == nullptr || srcStart < 0 || srcLength == 0) {
   1114    return -1;
   1115  }
   1116 
   1117  // UnicodeString does not find empty substrings
   1118  if(srcLength < 0 && srcChars[srcStart] == 0) {
   1119    return -1;
   1120  }
   1121 
   1122  // get the indices within bounds
   1123  pinIndices(start, length);
   1124 
   1125  // find the first occurrence of the substring
   1126  const char16_t *array = getArrayStart();
   1127  const char16_t *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
   1128  if(match == nullptr) {
   1129    return -1;
   1130  } else {
   1131    return static_cast<int32_t>(match - array);
   1132  }
   1133 }
   1134 
   1135 int32_t
   1136 UnicodeString::doIndexOf(char16_t c,
   1137             int32_t start,
   1138             int32_t length) const
   1139 {
   1140  // pin indices
   1141  pinIndices(start, length);
   1142 
   1143  // find the first occurrence of c
   1144  const char16_t *array = getArrayStart();
   1145  const char16_t *match = u_memchr(array + start, c, length);
   1146  if(match == nullptr) {
   1147    return -1;
   1148  } else {
   1149    return static_cast<int32_t>(match - array);
   1150  }
   1151 }
   1152 
   1153 int32_t
   1154 UnicodeString::doIndexOf(UChar32 c,
   1155                         int32_t start,
   1156                         int32_t length) const {
   1157  // pin indices
   1158  pinIndices(start, length);
   1159 
   1160  // find the first occurrence of c
   1161  const char16_t *array = getArrayStart();
   1162  const char16_t *match = u_memchr32(array + start, c, length);
   1163  if(match == nullptr) {
   1164    return -1;
   1165  } else {
   1166    return static_cast<int32_t>(match - array);
   1167  }
   1168 }
   1169 
   1170 int32_t 
   1171 UnicodeString::lastIndexOf(const char16_t *srcChars,
   1172               int32_t srcStart,
   1173               int32_t srcLength,
   1174               int32_t start,
   1175               int32_t length) const
   1176 {
   1177  if (isBogus() || srcChars == nullptr || srcStart < 0 || srcLength == 0) {
   1178    return -1;
   1179  }
   1180 
   1181  // UnicodeString does not find empty substrings
   1182  if(srcLength < 0 && srcChars[srcStart] == 0) {
   1183    return -1;
   1184  }
   1185 
   1186  // get the indices within bounds
   1187  pinIndices(start, length);
   1188 
   1189  // find the last occurrence of the substring
   1190  const char16_t *array = getArrayStart();
   1191  const char16_t *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
   1192  if(match == nullptr) {
   1193    return -1;
   1194  } else {
   1195    return static_cast<int32_t>(match - array);
   1196  }
   1197 }
   1198 
   1199 int32_t
   1200 UnicodeString::doLastIndexOf(char16_t c,
   1201                 int32_t start,
   1202                 int32_t length) const
   1203 {
   1204  if(isBogus()) {
   1205    return -1;
   1206  }
   1207 
   1208  // pin indices
   1209  pinIndices(start, length);
   1210 
   1211  // find the last occurrence of c
   1212  const char16_t *array = getArrayStart();
   1213  const char16_t *match = u_memrchr(array + start, c, length);
   1214  if(match == nullptr) {
   1215    return -1;
   1216  } else {
   1217    return static_cast<int32_t>(match - array);
   1218  }
   1219 }
   1220 
   1221 int32_t
   1222 UnicodeString::doLastIndexOf(UChar32 c,
   1223                             int32_t start,
   1224                             int32_t length) const {
   1225  // pin indices
   1226  pinIndices(start, length);
   1227 
   1228  // find the last occurrence of c
   1229  const char16_t *array = getArrayStart();
   1230  const char16_t *match = u_memrchr32(array + start, c, length);
   1231  if(match == nullptr) {
   1232    return -1;
   1233  } else {
   1234    return static_cast<int32_t>(match - array);
   1235  }
   1236 }
   1237 
   1238 //========================================
   1239 // Write implementation
   1240 //========================================
   1241 
   1242 UnicodeString& 
   1243 UnicodeString::findAndReplace(int32_t start,
   1244                  int32_t length,
   1245                  const UnicodeString& oldText,
   1246                  int32_t oldStart,
   1247                  int32_t oldLength,
   1248                  const UnicodeString& newText,
   1249                  int32_t newStart,
   1250                  int32_t newLength)
   1251 {
   1252  if(isBogus() || oldText.isBogus() || newText.isBogus()) {
   1253    return *this;
   1254  }
   1255 
   1256  pinIndices(start, length);
   1257  oldText.pinIndices(oldStart, oldLength);
   1258  newText.pinIndices(newStart, newLength);
   1259 
   1260  if(oldLength == 0) {
   1261    return *this;
   1262  }
   1263 
   1264  while(length > 0 && length >= oldLength) {
   1265    int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
   1266    if(pos < 0) {
   1267      // no more oldText's here: done
   1268      break;
   1269    } else {
   1270      // we found oldText, replace it by newText and go beyond it
   1271      replace(pos, oldLength, newText, newStart, newLength);
   1272      length -= pos + oldLength - start;
   1273      start = pos + newLength;
   1274    }
   1275  }
   1276 
   1277  return *this;
   1278 }
   1279 
   1280 
   1281 void
   1282 UnicodeString::setToBogus()
   1283 {
   1284  releaseArray();
   1285 
   1286  fUnion.fFields.fLengthAndFlags = kIsBogus;
   1287  fUnion.fFields.fArray = nullptr;
   1288  fUnion.fFields.fCapacity = 0;
   1289 }
   1290 
   1291 // turn a bogus string into an empty one
   1292 void
   1293 UnicodeString::unBogus() {
   1294  if(fUnion.fFields.fLengthAndFlags & kIsBogus) {
   1295    setToEmpty();
   1296  }
   1297 }
   1298 
   1299 const char16_t *
   1300 UnicodeString::getTerminatedBuffer() {
   1301  if(!isWritable()) {
   1302    return nullptr;
   1303  }
   1304  char16_t *array = getArrayStart();
   1305  int32_t len = length();
   1306  if(len < getCapacity()) {
   1307    if(fUnion.fFields.fLengthAndFlags & kBufferIsReadonly) {
   1308      // If len<capacity on a read-only alias, then array[len] is
   1309      // either the original NUL (if constructed with (true, s, length))
   1310      // or one of the original string contents characters (if later truncated),
   1311      // therefore we can assume that array[len] is initialized memory.
   1312      if(array[len] == 0) {
   1313        return array;
   1314      }
   1315    } else if(((fUnion.fFields.fLengthAndFlags & kRefCounted) == 0 || refCount() == 1)) {
   1316      // kRefCounted: Do not write the NUL if the buffer is shared.
   1317      // That is mostly safe, except when the length of one copy was modified
   1318      // without copy-on-write, e.g., via truncate(newLength) or remove().
   1319      // Then the NUL would be written into the middle of another copy's string.
   1320 
   1321      // Otherwise, the buffer is fully writable and it is anyway safe to write the NUL.
   1322      // Do not test if there is a NUL already because it might be uninitialized memory.
   1323      // (That would be safe, but tools like valgrind & Purify would complain.)
   1324      array[len] = 0;
   1325      return array;
   1326    }
   1327  }
   1328  if(len<INT32_MAX && cloneArrayIfNeeded(len+1)) {
   1329    array = getArrayStart();
   1330    array[len] = 0;
   1331    return array;
   1332  } else {
   1333    return nullptr;
   1334  }
   1335 }
   1336 
   1337 // setTo() analogous to the readonly-aliasing constructor with the same signature
   1338 UnicodeString &
   1339 UnicodeString::setTo(UBool isTerminated,
   1340                     ConstChar16Ptr textPtr,
   1341                     int32_t textLength)
   1342 {
   1343  if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
   1344    // do not modify a string that has an "open" getBuffer(minCapacity)
   1345    return *this;
   1346  }
   1347 
   1348  const char16_t *text = textPtr;
   1349  if(text == nullptr) {
   1350    // treat as an empty string, do not alias
   1351    releaseArray();
   1352    setToEmpty();
   1353    return *this;
   1354  }
   1355 
   1356  if( textLength < -1 ||
   1357      (textLength == -1 && !isTerminated) ||
   1358      (textLength >= 0 && isTerminated && text[textLength] != 0)
   1359  ) {
   1360    setToBogus();
   1361    return *this;
   1362  }
   1363 
   1364  releaseArray();
   1365 
   1366  if(textLength == -1) {
   1367    // text is terminated, or else it would have failed the above test
   1368    textLength = u_strlen(text);
   1369  }
   1370  fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
   1371  setArray(const_cast<char16_t*>(text), textLength, isTerminated ? textLength + 1 : textLength);
   1372  return *this;
   1373 }
   1374 
   1375 // setTo() analogous to the writable-aliasing constructor with the same signature
   1376 UnicodeString &
   1377 UnicodeString::setTo(char16_t *buffer,
   1378                     int32_t buffLength,
   1379                     int32_t buffCapacity) {
   1380  if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
   1381    // do not modify a string that has an "open" getBuffer(minCapacity)
   1382    return *this;
   1383  }
   1384 
   1385  if(buffer == nullptr) {
   1386    // treat as an empty string, do not alias
   1387    releaseArray();
   1388    setToEmpty();
   1389    return *this;
   1390  }
   1391 
   1392  if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
   1393    setToBogus();
   1394    return *this;
   1395  } else if(buffLength == -1) {
   1396    // buffLength = u_strlen(buff); but do not look beyond buffCapacity
   1397    const char16_t *p = buffer, *limit = buffer + buffCapacity;
   1398    while(p != limit && *p != 0) {
   1399      ++p;
   1400    }
   1401    buffLength = static_cast<int32_t>(p - buffer);
   1402  }
   1403 
   1404  releaseArray();
   1405 
   1406  fUnion.fFields.fLengthAndFlags = kWritableAlias;
   1407  setArray(buffer, buffLength, buffCapacity);
   1408  return *this;
   1409 }
   1410 
   1411 UnicodeString &UnicodeString::setToUTF8(StringPiece utf8) {
   1412  unBogus();
   1413  int32_t length = utf8.length();
   1414  int32_t capacity;
   1415  // The UTF-16 string will be at most as long as the UTF-8 string.
   1416  if(length <= US_STACKBUF_SIZE) {
   1417    capacity = US_STACKBUF_SIZE;
   1418  } else {
   1419    capacity = length + 1;  // +1 for the terminating NUL.
   1420  }
   1421  char16_t *utf16 = getBuffer(capacity);
   1422  int32_t length16;
   1423  UErrorCode errorCode = U_ZERO_ERROR;
   1424  u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
   1425      utf8.data(), length,
   1426      0xfffd,  // Substitution character.
   1427      nullptr,    // Don't care about number of substitutions.
   1428      &errorCode);
   1429  releaseBuffer(length16);
   1430  if(U_FAILURE(errorCode)) {
   1431    setToBogus();
   1432  }
   1433  return *this;
   1434 }
   1435 
   1436 UnicodeString&
   1437 UnicodeString::setCharAt(int32_t offset,
   1438             char16_t c)
   1439 {
   1440  int32_t len = length();
   1441  if(cloneArrayIfNeeded() && len > 0) {
   1442    if(offset < 0) {
   1443      offset = 0;
   1444    } else if(offset >= len) {
   1445      offset = len - 1;
   1446    }
   1447 
   1448    getArrayStart()[offset] = c;
   1449  }
   1450  return *this;
   1451 }
   1452 
   1453 UnicodeString&
   1454 UnicodeString::replace(int32_t start,
   1455               int32_t _length,
   1456               UChar32 srcChar) {
   1457  char16_t buffer[U16_MAX_LENGTH];
   1458  int32_t count = 0;
   1459  UBool isError = false;
   1460  U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
   1461  // We test isError so that the compiler does not complain that we don't.
   1462  // If isError (srcChar is not a valid code point) then count==0 which means
   1463  // we remove the source segment rather than replacing it with srcChar.
   1464  return doReplace(start, _length, buffer, 0, isError ? 0 : count);
   1465 }
   1466 
   1467 UnicodeString&
   1468 UnicodeString::append(UChar32 srcChar) {
   1469  char16_t buffer[U16_MAX_LENGTH];
   1470  int32_t _length = 0;
   1471  UBool isError = false;
   1472  U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
   1473  // We test isError so that the compiler does not complain that we don't.
   1474  // If isError then _length==0 which turns the doAppend() into a no-op anyway.
   1475  return isError ? *this : doAppend(buffer, 0, _length);
   1476 }
   1477 
   1478 UnicodeString&
   1479 UnicodeString::doReplace( int32_t start,
   1480              int32_t length,
   1481              const UnicodeString& src,
   1482              int32_t srcStart,
   1483              int32_t srcLength)
   1484 {
   1485  // pin the indices to legal values
   1486  src.pinIndices(srcStart, srcLength);
   1487 
   1488  // get the characters from src
   1489  // and replace the range in ourselves with them
   1490  return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
   1491 }
   1492 
   1493 UnicodeString&
   1494 UnicodeString::doReplace(int32_t start,
   1495             int32_t length,
   1496             const char16_t *srcChars,
   1497             int32_t srcStart,
   1498             int32_t srcLength)
   1499 {
   1500  if(!isWritable()) {
   1501    return *this;
   1502  }
   1503 
   1504  int32_t oldLength = this->length();
   1505 
   1506  // optimize (read-only alias).remove(0, start) and .remove(start, end)
   1507  if((fUnion.fFields.fLengthAndFlags&kBufferIsReadonly) && srcLength == 0) {
   1508    if(start == 0) {
   1509      // remove prefix by adjusting the array pointer
   1510      pinIndex(length);
   1511      fUnion.fFields.fArray += length;
   1512      fUnion.fFields.fCapacity -= length;
   1513      setLength(oldLength - length);
   1514      return *this;
   1515    } else {
   1516      pinIndex(start);
   1517      if(length >= (oldLength - start)) {
   1518        // remove suffix by reducing the length (like truncate())
   1519        setLength(start);
   1520        fUnion.fFields.fCapacity = start;  // not NUL-terminated any more
   1521        return *this;
   1522      }
   1523    }
   1524  }
   1525 
   1526  if(start == oldLength) {
   1527    return doAppend(srcChars, srcStart, srcLength);
   1528  }
   1529 
   1530  if (srcChars == nullptr) {
   1531    srcLength = 0;
   1532  } else {
   1533    // Perform all remaining operations relative to srcChars + srcStart.
   1534    // From this point forward, do not use srcStart.
   1535    srcChars += srcStart;
   1536    if (srcLength < 0) {
   1537      // get the srcLength if necessary
   1538      srcLength = u_strlen(srcChars);
   1539    }
   1540  }
   1541 
   1542  // pin the indices to legal values
   1543  pinIndices(start, length);
   1544 
   1545  // Calculate the size of the string after the replace.
   1546  // Avoid int32_t overflow.
   1547  int32_t newLength = oldLength - length;
   1548  if(srcLength > (INT32_MAX - newLength)) {
   1549    setToBogus();
   1550    return *this;
   1551  }
   1552  newLength += srcLength;
   1553 
   1554  // Check for insertion into ourself
   1555  const char16_t *oldArray = getArrayStart();
   1556  if (isBufferWritable() &&
   1557      oldArray < srcChars + srcLength &&
   1558      srcChars < oldArray + oldLength) {
   1559    // Copy into a new UnicodeString and start over
   1560    UnicodeString copy(srcChars, srcLength);
   1561    if (copy.isBogus()) {
   1562      setToBogus();
   1563      return *this;
   1564    }
   1565    return doReplace(start, length, copy.getArrayStart(), 0, srcLength);
   1566  }
   1567 
   1568  // cloneArrayIfNeeded(doCopyArray=false) may change fArray but will not copy the current contents;
   1569  // therefore we need to keep the current fArray
   1570  char16_t oldStackBuffer[US_STACKBUF_SIZE];
   1571  if((fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {
   1572    // copy the stack buffer contents because it will be overwritten with
   1573    // fUnion.fFields values
   1574    u_memcpy(oldStackBuffer, oldArray, oldLength);
   1575    oldArray = oldStackBuffer;
   1576  }
   1577 
   1578  // clone our array and allocate a bigger array if needed
   1579  int32_t *bufferToDelete = nullptr;
   1580  if(!cloneArrayIfNeeded(newLength, getGrowCapacity(newLength),
   1581                         false, &bufferToDelete)
   1582  ) {
   1583    return *this;
   1584  }
   1585 
   1586  // now do the replace
   1587 
   1588  char16_t *newArray = getArrayStart();
   1589  if(newArray != oldArray) {
   1590    // if fArray changed, then we need to copy everything except what will change
   1591    us_arrayCopy(oldArray, 0, newArray, 0, start);
   1592    us_arrayCopy(oldArray, start + length,
   1593                 newArray, start + srcLength,
   1594                 oldLength - (start + length));
   1595  } else if(length != srcLength) {
   1596    // fArray did not change; copy only the portion that isn't changing, leaving a hole
   1597    us_arrayCopy(oldArray, start + length,
   1598                 newArray, start + srcLength,
   1599                 oldLength - (start + length));
   1600  }
   1601 
   1602  // now fill in the hole with the new string
   1603  us_arrayCopy(srcChars, 0, newArray, start, srcLength);
   1604 
   1605  setLength(newLength);
   1606 
   1607  // delayed delete in case srcChars == fArray when we started, and
   1608  // to keep oldArray alive for the above operations
   1609  if (bufferToDelete) {
   1610    uprv_free(bufferToDelete);
   1611  }
   1612 
   1613  return *this;
   1614 }
   1615 
   1616 UnicodeString&
   1617 UnicodeString::doReplace(int32_t start, int32_t length, std::u16string_view src) {
   1618  if (!isWritable()) {
   1619    return *this;
   1620  }
   1621  if (src.length() > INT32_MAX) {
   1622    setToBogus();
   1623    return *this;
   1624  }
   1625  return doReplace(start, length, src.data(), 0, static_cast<int32_t>(src.length()));
   1626 }
   1627 
   1628 // Versions of doReplace() only for append() variants.
   1629 // doReplace() and doAppend() optimize for different cases.
   1630 
   1631 UnicodeString&
   1632 UnicodeString::doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength) {
   1633  if(srcLength == 0) {
   1634    return *this;
   1635  }
   1636 
   1637  // pin the indices to legal values
   1638  src.pinIndices(srcStart, srcLength);
   1639  return doAppend(src.getArrayStart(), srcStart, srcLength);
   1640 }
   1641 
   1642 UnicodeString&
   1643 UnicodeString::doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) {
   1644  if(!isWritable() || srcLength == 0 || srcChars == nullptr) {
   1645    return *this;
   1646  }
   1647 
   1648  // Perform all remaining operations relative to srcChars + srcStart.
   1649  // From this point forward, do not use srcStart.
   1650  srcChars += srcStart;
   1651 
   1652  if(srcLength < 0) {
   1653    // get the srcLength if necessary
   1654    if((srcLength = u_strlen(srcChars)) == 0) {
   1655      return *this;
   1656    }
   1657  }
   1658 
   1659  int32_t oldLength = length();
   1660  int32_t newLength;
   1661 
   1662  if (srcLength <= getCapacity() - oldLength && isBufferWritable()) {
   1663    newLength = oldLength + srcLength;
   1664    // Faster than a memmove
   1665    if (srcLength <= 4) {
   1666      char16_t *arr = getArrayStart();
   1667      arr[oldLength] = srcChars[0];
   1668      if (srcLength > 1) arr[oldLength+1] = srcChars[1];
   1669      if (srcLength > 2) arr[oldLength+2] = srcChars[2];
   1670      if (srcLength > 3) arr[oldLength+3] = srcChars[3];
   1671      setLength(newLength);
   1672      return *this;
   1673    }
   1674  } else {
   1675    if (uprv_add32_overflow(oldLength, srcLength, &newLength)) {
   1676      setToBogus();
   1677      return *this;
   1678    }
   1679 
   1680    // Check for append onto ourself
   1681    const char16_t* oldArray = getArrayStart();
   1682    if (isBufferWritable() &&
   1683        oldArray < srcChars + srcLength &&
   1684        srcChars < oldArray + oldLength) {
   1685      // Copy into a new UnicodeString and start over
   1686      UnicodeString copy(srcChars, srcLength);
   1687      if (copy.isBogus()) {
   1688        setToBogus();
   1689        return *this;
   1690      }
   1691      return doAppend(copy.getArrayStart(), 0, srcLength);
   1692    }
   1693 
   1694    // optimize append() onto a large-enough, owned string
   1695    if (!cloneArrayIfNeeded(newLength, getGrowCapacity(newLength))) {
   1696      return *this;
   1697    }
   1698  }
   1699 
   1700  char16_t *newArray = getArrayStart();
   1701  // Do not copy characters when
   1702  //   char16_t *buffer=str.getAppendBuffer(...);
   1703  // is followed by
   1704  //   str.append(buffer, length);
   1705  // or
   1706  //   str.appendString(buffer, length)
   1707  // or similar.
   1708  if(srcChars != newArray + oldLength) {
   1709    us_arrayCopy(srcChars, 0, newArray, oldLength, srcLength);
   1710  }
   1711  setLength(newLength);
   1712 
   1713  return *this;
   1714 }
   1715 
   1716 UnicodeString&
   1717 UnicodeString::doAppend(std::u16string_view src) {
   1718  if (!isWritable() || src.empty()) {
   1719    return *this;
   1720  }
   1721  if (src.length() > INT32_MAX) {
   1722    setToBogus();
   1723    return *this;
   1724  }
   1725  return doAppend(src.data(), 0, static_cast<int32_t>(src.length()));
   1726 }
   1727 
   1728 /**
   1729 * Replaceable API
   1730 */
   1731 void
   1732 UnicodeString::handleReplaceBetween(int32_t start,
   1733                                    int32_t limit,
   1734                                    const UnicodeString& text) {
   1735    replaceBetween(start, limit, text);
   1736 }
   1737 
   1738 /**
   1739 * Replaceable API
   1740 */
   1741 void 
   1742 UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
   1743    if (limit <= start) {
   1744        return; // Nothing to do; avoid bogus malloc call
   1745    }
   1746    char16_t* text = static_cast<char16_t*>(uprv_malloc(sizeof(char16_t) * (limit - start)));
   1747    // Check to make sure text is not null.
   1748    if (text != nullptr) {
   1749     extractBetween(start, limit, text, 0);
   1750     insert(dest, text, 0, limit - start);    
   1751     uprv_free(text);
   1752    }
   1753 }
   1754 
   1755 /**
   1756 * Replaceable API
   1757 *
   1758 * NOTE: This is for the Replaceable class.  There is no rep.cpp,
   1759 * so we implement this function here.
   1760 */
   1761 UBool Replaceable::hasMetaData() const {
   1762    return true;
   1763 }
   1764 
   1765 /**
   1766 * Replaceable API
   1767 */
   1768 UBool UnicodeString::hasMetaData() const {
   1769    return false;
   1770 }
   1771 
   1772 UnicodeString&
   1773 UnicodeString::doReverse(int32_t start, int32_t length) {
   1774  if(length <= 1 || !cloneArrayIfNeeded()) {
   1775    return *this;
   1776  }
   1777 
   1778  // pin the indices to legal values
   1779  pinIndices(start, length);
   1780  if(length <= 1) {  // pinIndices() might have shrunk the length
   1781    return *this;
   1782  }
   1783 
   1784  char16_t *left = getArrayStart() + start;
   1785  char16_t *right = left + length - 1;  // -1 for inclusive boundary (length>=2)
   1786  char16_t swap;
   1787  UBool hasSupplementary = false;
   1788 
   1789  // Before the loop we know left<right because length>=2.
   1790  do {
   1791    hasSupplementary |= static_cast<UBool>(U16_IS_LEAD(swap = *left));
   1792    hasSupplementary |= static_cast<UBool>(U16_IS_LEAD(*left++ = *right));
   1793    *right-- = swap;
   1794  } while(left < right);
   1795  // Make sure to test the middle code unit of an odd-length string.
   1796  // Redundant if the length is even.
   1797  hasSupplementary |= static_cast<UBool>(U16_IS_LEAD(*left));
   1798 
   1799  /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
   1800  if(hasSupplementary) {
   1801    char16_t swap2;
   1802 
   1803    left = getArrayStart() + start;
   1804    right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
   1805    while(left < right) {
   1806      if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) {
   1807        *left++ = swap2;
   1808        *left++ = swap;
   1809      } else {
   1810        ++left;
   1811      }
   1812    }
   1813  }
   1814 
   1815  return *this;
   1816 }
   1817 
   1818 UBool 
   1819 UnicodeString::padLeading(int32_t targetLength,
   1820                          char16_t padChar)
   1821 {
   1822  int32_t oldLength = length();
   1823  if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
   1824    return false;
   1825  } else {
   1826    // move contents up by padding width
   1827    char16_t *array = getArrayStart();
   1828    int32_t start = targetLength - oldLength;
   1829    us_arrayCopy(array, 0, array, start, oldLength);
   1830 
   1831    // fill in padding character
   1832    while(--start >= 0) {
   1833      array[start] = padChar;
   1834    }
   1835    setLength(targetLength);
   1836    return true;
   1837  }
   1838 }
   1839 
   1840 UBool 
   1841 UnicodeString::padTrailing(int32_t targetLength,
   1842                           char16_t padChar)
   1843 {
   1844  int32_t oldLength = length();
   1845  if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
   1846    return false;
   1847  } else {
   1848    // fill in padding character
   1849    char16_t *array = getArrayStart();
   1850    int32_t length = targetLength;
   1851    while(--length >= oldLength) {
   1852      array[length] = padChar;
   1853    }
   1854    setLength(targetLength);
   1855    return true;
   1856  }
   1857 }
   1858 
   1859 //========================================
   1860 // Hashing
   1861 //========================================
   1862 int32_t
   1863 UnicodeString::doHashCode() const
   1864 {
   1865    /* Delegate hash computation to uhash.  This makes UnicodeString
   1866     * hashing consistent with char16_t* hashing.  */
   1867    int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length());
   1868    if (hashCode == kInvalidHashCode) {
   1869        hashCode = kEmptyHashCode;
   1870    }
   1871    return hashCode;
   1872 }
   1873 
   1874 //========================================
   1875 // External Buffer
   1876 //========================================
   1877 
   1878 char16_t *
   1879 UnicodeString::getBuffer(int32_t minCapacity) {
   1880  if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
   1881    fUnion.fFields.fLengthAndFlags|=kOpenGetBuffer;
   1882    setZeroLength();
   1883    return getArrayStart();
   1884  } else {
   1885    return nullptr;
   1886  }
   1887 }
   1888 
   1889 void
   1890 UnicodeString::releaseBuffer(int32_t newLength) {
   1891  if(fUnion.fFields.fLengthAndFlags&kOpenGetBuffer && newLength>=-1) {
   1892    // set the new fLength
   1893    int32_t capacity=getCapacity();
   1894    if(newLength==-1) {
   1895      // the new length is the string length, capped by fCapacity
   1896      const char16_t *array=getArrayStart(), *p=array, *limit=array+capacity;
   1897      while(p<limit && *p!=0) {
   1898        ++p;
   1899      }
   1900      newLength = static_cast<int32_t>(p - array);
   1901    } else if(newLength>capacity) {
   1902      newLength=capacity;
   1903    }
   1904    setLength(newLength);
   1905    fUnion.fFields.fLengthAndFlags&=~kOpenGetBuffer;
   1906  }
   1907 }
   1908 
   1909 //========================================
   1910 // Miscellaneous
   1911 //========================================
   1912 UBool
   1913 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
   1914                                  int32_t growCapacity,
   1915                                  UBool doCopyArray,
   1916                                  int32_t **pBufferToDelete,
   1917                                  UBool forceClone) {
   1918  // default parameters need to be static, therefore
   1919  // the defaults are -1 to have convenience defaults
   1920  if(newCapacity == -1) {
   1921    newCapacity = getCapacity();
   1922  }
   1923 
   1924  // while a getBuffer(minCapacity) is "open",
   1925  // prevent any modifications of the string by returning false here
   1926  // if the string is bogus, then only an assignment or similar can revive it
   1927  if(!isWritable()) {
   1928    return false;
   1929  }
   1930 
   1931  /*
   1932   * We need to make a copy of the array if
   1933   * the buffer is read-only, or
   1934   * the buffer is refCounted (shared), and refCount>1, or
   1935   * the buffer is too small.
   1936   * Return false if memory could not be allocated.
   1937   */
   1938  if(forceClone ||
   1939     fUnion.fFields.fLengthAndFlags & kBufferIsReadonly ||
   1940     (fUnion.fFields.fLengthAndFlags & kRefCounted && refCount() > 1) ||
   1941     newCapacity > getCapacity()
   1942  ) {
   1943    // check growCapacity for default value and use of the stack buffer
   1944    if(growCapacity < 0) {
   1945      growCapacity = newCapacity;
   1946    } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
   1947      growCapacity = US_STACKBUF_SIZE;
   1948    } else if(newCapacity > growCapacity) {
   1949      setToBogus();
   1950      return false;  // bad inputs
   1951    }
   1952    if(growCapacity > kMaxCapacity) {
   1953      setToBogus();
   1954      return false;
   1955    }
   1956 
   1957    // save old values
   1958    char16_t oldStackBuffer[US_STACKBUF_SIZE];
   1959    char16_t *oldArray;
   1960    int32_t oldLength = length();
   1961    int16_t flags = fUnion.fFields.fLengthAndFlags;
   1962 
   1963    if(flags&kUsingStackBuffer) {
   1964      U_ASSERT(!(flags&kRefCounted)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */
   1965      if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
   1966        // copy the stack buffer contents because it will be overwritten with
   1967        // fUnion.fFields values
   1968        us_arrayCopy(fUnion.fStackFields.fBuffer, 0, oldStackBuffer, 0, oldLength);
   1969        oldArray = oldStackBuffer;
   1970      } else {
   1971        oldArray = nullptr; // no need to copy from the stack buffer to itself
   1972      }
   1973    } else {
   1974      oldArray = fUnion.fFields.fArray;
   1975      U_ASSERT(oldArray!=nullptr); /* when stack buffer is not used, oldArray must have a non-nullptr reference */
   1976    }
   1977 
   1978    // allocate a new array
   1979    if(allocate(growCapacity) ||
   1980       (newCapacity < growCapacity && allocate(newCapacity))
   1981    ) {
   1982      if(doCopyArray) {
   1983        // copy the contents
   1984        // do not copy more than what fits - it may be smaller than before
   1985        int32_t minLength = oldLength;
   1986        newCapacity = getCapacity();
   1987        if(newCapacity < minLength) {
   1988          minLength = newCapacity;
   1989        }
   1990        if(oldArray != nullptr) {
   1991          us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
   1992        }
   1993        setLength(minLength);
   1994      } else {
   1995        setZeroLength();
   1996      }
   1997 
   1998      // release the old array
   1999      if(flags & kRefCounted) {
   2000        // the array is refCounted; decrement and release if 0
   2001        u_atomic_int32_t* pRefCount = reinterpret_cast<u_atomic_int32_t*>(oldArray) - 1;
   2002        if(umtx_atomic_dec(pRefCount) == 0) {
   2003          if (pBufferToDelete == nullptr) {
   2004              // Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t
   2005              // is defined as volatile. (Volatile has useful non-standard behavior
   2006              //   with this compiler.)
   2007            uprv_free((void *)pRefCount);
   2008          } else {
   2009            // the caller requested to delete it himself
   2010            *pBufferToDelete = reinterpret_cast<int32_t*>(pRefCount);
   2011          }
   2012        }
   2013      }
   2014    } else {
   2015      // not enough memory for growCapacity and not even for the smaller newCapacity
   2016      // reset the old values for setToBogus() to release the array
   2017      if(!(flags&kUsingStackBuffer)) {
   2018        fUnion.fFields.fArray = oldArray;
   2019      }
   2020      fUnion.fFields.fLengthAndFlags = flags;
   2021      setToBogus();
   2022      return false;
   2023    }
   2024  }
   2025  return true;
   2026 }
   2027 
   2028 // UnicodeStringAppendable ------------------------------------------------- ***
   2029 
   2030 UnicodeStringAppendable::~UnicodeStringAppendable() {}
   2031 
   2032 UBool
   2033 UnicodeStringAppendable::appendCodeUnit(char16_t c) {
   2034  return str.doAppend(&c, 0, 1).isWritable();
   2035 }
   2036 
   2037 UBool
   2038 UnicodeStringAppendable::appendCodePoint(UChar32 c) {
   2039  char16_t buffer[U16_MAX_LENGTH];
   2040  int32_t cLength = 0;
   2041  UBool isError = false;
   2042  U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError);
   2043  return !isError && str.doAppend(buffer, 0, cLength).isWritable();
   2044 }
   2045 
   2046 UBool
   2047 UnicodeStringAppendable::appendString(const char16_t *s, int32_t length) {
   2048  return str.doAppend(s, 0, length).isWritable();
   2049 }
   2050 
   2051 UBool
   2052 UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) {
   2053  return str.cloneArrayIfNeeded(str.length() + appendCapacity);
   2054 }
   2055 
   2056 char16_t *
   2057 UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,
   2058                                         int32_t desiredCapacityHint,
   2059                                         char16_t *scratch, int32_t scratchCapacity,
   2060                                         int32_t *resultCapacity) {
   2061  if(minCapacity < 1 || scratchCapacity < minCapacity) {
   2062    *resultCapacity = 0;
   2063    return nullptr;
   2064  }
   2065  int32_t oldLength = str.length();
   2066  if(minCapacity <= (kMaxCapacity - oldLength) &&
   2067      desiredCapacityHint <= (kMaxCapacity - oldLength) &&
   2068      str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
   2069    *resultCapacity = str.getCapacity() - oldLength;
   2070    return str.getArrayStart() + oldLength;
   2071  }
   2072  *resultCapacity = scratchCapacity;
   2073  return scratch;
   2074 }
   2075 
   2076 U_NAMESPACE_END
   2077 
   2078 U_NAMESPACE_USE
   2079 
   2080 U_CAPI int32_t U_EXPORT2
   2081 uhash_hashUnicodeString(const UElement key) {
   2082    const UnicodeString *str = (const UnicodeString*) key.pointer;
   2083    return (str == nullptr) ? 0 : str->hashCode();
   2084 }
   2085 
   2086 // Moved here from uhash_us.cpp so that using a UVector of UnicodeString*
   2087 // does not depend on hashtable code.
   2088 U_CAPI UBool U_EXPORT2
   2089 uhash_compareUnicodeString(const UElement key1, const UElement key2) {
   2090    const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
   2091    const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
   2092    if (str1 == str2) {
   2093        return true;
   2094    }
   2095    if (str1 == nullptr || str2 == nullptr) {
   2096        return false;
   2097    }
   2098    return *str1 == *str2;
   2099 }
   2100 
   2101 #ifdef U_STATIC_IMPLEMENTATION
   2102 /*
   2103 This should never be called. It is defined here to make sure that the
   2104 virtual vector deleting destructor is defined within unistr.cpp.
   2105 The vector deleting destructor is already a part of UObject,
   2106 but defining it here makes sure that it is included with this object file.
   2107 This makes sure that static library dependencies are kept to a minimum.
   2108 */
   2109 #if defined(__clang__) || U_GCC_MAJOR_MINOR >= 1100
   2110 #pragma GCC diagnostic push
   2111 #pragma GCC diagnostic ignored "-Wunused-function"
   2112 static void uprv_UnicodeStringDummy() {
   2113    delete [] (new UnicodeString[2]);
   2114 }
   2115 #pragma GCC diagnostic pop
   2116 #endif
   2117 #endif