tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

formatted_string_builder.cpp (15395B)


      1 // © 2017 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 #include "unicode/utypes.h"
      5 
      6 #if !UCONFIG_NO_FORMATTING
      7 
      8 #include "formatted_string_builder.h"
      9 #include "putilimp.h"
     10 #include "unicode/ustring.h"
     11 #include "unicode/utf16.h"
     12 #include "unicode/unum.h" // for UNumberFormatFields literals
     13 
     14 namespace {
     15 
     16 // A version of uprv_memcpy that checks for length 0.
     17 // By default, uprv_memcpy requires a length of at least 1.
     18 inline void uprv_memcpy2(void* dest, const void* src, size_t len) {
     19    if (len > 0) {
     20        uprv_memcpy(dest, src, len);
     21    }
     22 }
     23 
     24 // A version of uprv_memmove that checks for length 0.
     25 // By default, uprv_memmove requires a length of at least 1.
     26 inline void uprv_memmove2(void* dest, const void* src, size_t len) {
     27    if (len > 0) {
     28        uprv_memmove(dest, src, len);
     29    }
     30 }
     31 
     32 } // namespace
     33 
     34 
     35 U_NAMESPACE_BEGIN
     36 
     37 FormattedStringBuilder::FormattedStringBuilder() {
     38 #if U_DEBUG
     39    // Initializing the memory to non-zero helps catch some bugs that involve
     40    // reading from an improperly terminated string.
     41    for (int32_t i=0; i<getCapacity(); i++) {
     42        getCharPtr()[i] = 1;
     43    }
     44 #endif
     45 }
     46 
     47 FormattedStringBuilder::~FormattedStringBuilder() {
     48    if (fUsingHeap) {
     49        uprv_free(fChars.heap.ptr);
     50        uprv_free(fFields.heap.ptr);
     51    }
     52 }
     53 
     54 FormattedStringBuilder::FormattedStringBuilder(const FormattedStringBuilder &other) {
     55    *this = other;
     56 }
     57 
     58 FormattedStringBuilder &FormattedStringBuilder::operator=(const FormattedStringBuilder &other) {
     59    // Check for self-assignment
     60    if (this == &other) {
     61        return *this;
     62    }
     63 
     64    // Continue with deallocation and copying
     65    if (fUsingHeap) {
     66        uprv_free(fChars.heap.ptr);
     67        uprv_free(fFields.heap.ptr);
     68        fUsingHeap = false;
     69    }
     70 
     71    int32_t capacity = other.getCapacity();
     72    if (capacity > DEFAULT_CAPACITY) {
     73        // FIXME: uprv_malloc
     74        // C++ note: malloc appears in two places: here and in prepareForInsertHelper.
     75        auto* newChars = static_cast<char16_t*>(uprv_malloc(sizeof(char16_t) * capacity));
     76        auto* newFields = static_cast<Field*>(uprv_malloc(sizeof(Field) * capacity));
     77        if (newChars == nullptr || newFields == nullptr) {
     78            // UErrorCode is not available; fail silently.
     79            uprv_free(newChars);
     80            uprv_free(newFields);
     81            *this = FormattedStringBuilder();  // can't fail
     82            return *this;
     83        }
     84 
     85        fUsingHeap = true;
     86        fChars.heap.capacity = capacity;
     87        fChars.heap.ptr = newChars;
     88        fFields.heap.capacity = capacity;
     89        fFields.heap.ptr = newFields;
     90    }
     91 
     92    uprv_memcpy2(getCharPtr(), other.getCharPtr(), sizeof(char16_t) * capacity);
     93    uprv_memcpy2(getFieldPtr(), other.getFieldPtr(), sizeof(Field) * capacity);
     94 
     95    fZero = other.fZero;
     96    fLength = other.fLength;
     97    return *this;
     98 }
     99 
    100 int32_t FormattedStringBuilder::length() const {
    101    return fLength;
    102 }
    103 
    104 int32_t FormattedStringBuilder::codePointCount() const {
    105    return u_countChar32(getCharPtr() + fZero, fLength);
    106 }
    107 
    108 UChar32 FormattedStringBuilder::getFirstCodePoint() const {
    109    if (fLength == 0) {
    110        return -1;
    111    }
    112    UChar32 cp;
    113    U16_GET(getCharPtr() + fZero, 0, 0, fLength, cp);
    114    return cp;
    115 }
    116 
    117 UChar32 FormattedStringBuilder::getLastCodePoint() const {
    118    if (fLength == 0) {
    119        return -1;
    120    }
    121    int32_t offset = fLength;
    122    U16_BACK_1(getCharPtr() + fZero, 0, offset);
    123    UChar32 cp;
    124    U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
    125    return cp;
    126 }
    127 
    128 UChar32 FormattedStringBuilder::codePointAt(int32_t index) const {
    129    UChar32 cp;
    130    U16_GET(getCharPtr() + fZero, 0, index, fLength, cp);
    131    return cp;
    132 }
    133 
    134 UChar32 FormattedStringBuilder::codePointBefore(int32_t index) const {
    135    int32_t offset = index;
    136    U16_BACK_1(getCharPtr() + fZero, 0, offset);
    137    UChar32 cp;
    138    U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
    139    return cp;
    140 }
    141 
    142 FormattedStringBuilder &FormattedStringBuilder::clear() {
    143    // TODO: Reset the heap here?
    144    fZero = getCapacity() / 2;
    145    fLength = 0;
    146    return *this;
    147 }
    148 
    149 int32_t
    150 FormattedStringBuilder::insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status) {
    151    int32_t count = U16_LENGTH(codePoint);
    152    int32_t position = prepareForInsert(index, count, status);
    153    if (U_FAILURE(status)) {
    154        return count;
    155    }
    156    auto* charPtr = getCharPtr();
    157    auto* fieldPtr = getFieldPtr();
    158    if (count == 1) {
    159        charPtr[position] = static_cast<char16_t>(codePoint);
    160        fieldPtr[position] = field;
    161    } else {
    162        charPtr[position] = U16_LEAD(codePoint);
    163        charPtr[position + 1] = U16_TRAIL(codePoint);
    164        fieldPtr[position] = fieldPtr[position + 1] = field;
    165    }
    166    return count;
    167 }
    168 
    169 int32_t FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, Field field,
    170                                    UErrorCode &status) {
    171    if (unistr.length() == 0) {
    172        // Nothing to insert.
    173        return 0;
    174    } else if (unistr.length() == 1) {
    175        // Fast path: insert using insertCodePoint.
    176        return insertCodePoint(index, unistr.charAt(0), field, status);
    177    } else {
    178        return insert(index, unistr, 0, unistr.length(), field, status);
    179    }
    180 }
    181 
    182 int32_t
    183 FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end,
    184                            Field field, UErrorCode &status) {
    185    int32_t count = end - start;
    186    int32_t position = prepareForInsert(index, count, status);
    187    if (U_FAILURE(status)) {
    188        return count;
    189    }
    190    for (int32_t i = 0; i < count; i++) {
    191        getCharPtr()[position + i] = unistr.charAt(start + i);
    192        getFieldPtr()[position + i] = field;
    193    }
    194    return count;
    195 }
    196 
    197 int32_t
    198 FormattedStringBuilder::splice(int32_t startThis, int32_t endThis,  const UnicodeString &unistr,
    199                            int32_t startOther, int32_t endOther, Field field, UErrorCode& status) {
    200    int32_t thisLength = endThis - startThis;
    201    int32_t otherLength = endOther - startOther;
    202    int32_t count = otherLength - thisLength;
    203    if (U_FAILURE(status)) {
    204        return count;
    205    }
    206    int32_t position;
    207    if (count > 0) {
    208        // Overall, chars need to be added.
    209        position = prepareForInsert(startThis, count, status);
    210    } else {
    211        // Overall, chars need to be removed or kept the same.
    212        position = remove(startThis, -count);
    213    }
    214    if (U_FAILURE(status)) {
    215        return count;
    216    }
    217    for (int32_t i = 0; i < otherLength; i++) {
    218        getCharPtr()[position + i] = unistr.charAt(startOther + i);
    219        getFieldPtr()[position + i] = field;
    220    }
    221    return count;
    222 }
    223 
    224 int32_t FormattedStringBuilder::append(const FormattedStringBuilder &other, UErrorCode &status) {
    225    return insert(fLength, other, status);
    226 }
    227 
    228 int32_t
    229 FormattedStringBuilder::insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status) {
    230    if (U_FAILURE(status)) {
    231        return 0;
    232    }
    233    if (this == &other) {
    234        status = U_ILLEGAL_ARGUMENT_ERROR;
    235        return 0;
    236    }
    237    int32_t count = other.fLength;
    238    if (count == 0) {
    239        // Nothing to insert.
    240        return 0;
    241    }
    242    int32_t position = prepareForInsert(index, count, status);
    243    if (U_FAILURE(status)) {
    244        return count;
    245    }
    246    for (int32_t i = 0; i < count; i++) {
    247        getCharPtr()[position + i] = other.charAt(i);
    248        getFieldPtr()[position + i] = other.fieldAt(i);
    249    }
    250    return count;
    251 }
    252 
    253 void FormattedStringBuilder::writeTerminator(UErrorCode& status) {
    254    int32_t position = prepareForInsert(fLength, 1, status);
    255    if (U_FAILURE(status)) {
    256        return;
    257    }
    258    getCharPtr()[position] = 0;
    259    getFieldPtr()[position] = kUndefinedField;
    260    fLength--;
    261 }
    262 
    263 int32_t FormattedStringBuilder::prepareForInsert(int32_t index, int32_t count, UErrorCode &status) {
    264    U_ASSERT(index >= 0);
    265    U_ASSERT(index <= fLength);
    266    U_ASSERT(count >= 0);
    267    U_ASSERT(fZero >= 0);
    268    U_ASSERT(fLength >= 0);
    269    U_ASSERT(getCapacity() - fZero >= fLength);
    270    if (U_FAILURE(status)) {
    271        return count;
    272    }
    273    if (index == 0 && fZero - count >= 0) {
    274        // Append to start
    275        fZero -= count;
    276        fLength += count;
    277        return fZero;
    278    } else if (index == fLength && count <= getCapacity() - fZero - fLength) {
    279        // Append to end
    280        fLength += count;
    281        return fZero + fLength - count;
    282    } else {
    283        // Move chars around and/or allocate more space
    284        return prepareForInsertHelper(index, count, status);
    285    }
    286 }
    287 
    288 int32_t FormattedStringBuilder::prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status) {
    289    int32_t oldCapacity = getCapacity();
    290    int32_t oldZero = fZero;
    291    char16_t *oldChars = getCharPtr();
    292    Field *oldFields = getFieldPtr();
    293    int32_t newLength;
    294    if (uprv_add32_overflow(fLength, count, &newLength)) {
    295        status = U_INPUT_TOO_LONG_ERROR;
    296        return -1;
    297    }
    298    int32_t newZero;
    299    if (newLength > oldCapacity) {
    300        if (newLength > INT32_MAX / 2) {
    301            // We do not support more than 1G char16_t in this code because
    302            // dealing with >2G *bytes* can cause subtle bugs.
    303            status = U_INPUT_TOO_LONG_ERROR;
    304            return -1;
    305        }
    306        // Keep newCapacity also to at most 1G char16_t.
    307        int32_t newCapacity = newLength * 2;
    308        newZero = (newCapacity - newLength) / 2;
    309 
    310        // C++ note: malloc appears in two places: here and in the assignment operator.
    311        auto* newChars =
    312            static_cast<char16_t*>(uprv_malloc(sizeof(char16_t) * static_cast<size_t>(newCapacity)));
    313        auto* newFields =
    314            static_cast<Field*>(uprv_malloc(sizeof(Field) * static_cast<size_t>(newCapacity)));
    315        if (newChars == nullptr || newFields == nullptr) {
    316            uprv_free(newChars);
    317            uprv_free(newFields);
    318            status = U_MEMORY_ALLOCATION_ERROR;
    319            return -1;
    320        }
    321 
    322        // First copy the prefix and then the suffix, leaving room for the new chars that the
    323        // caller wants to insert.
    324        // C++ note: memcpy is OK because the src and dest do not overlap.
    325        uprv_memcpy2(newChars + newZero, oldChars + oldZero, sizeof(char16_t) * index);
    326        uprv_memcpy2(newChars + newZero + index + count,
    327                oldChars + oldZero + index,
    328                sizeof(char16_t) * (fLength - index));
    329        uprv_memcpy2(newFields + newZero, oldFields + oldZero, sizeof(Field) * index);
    330        uprv_memcpy2(newFields + newZero + index + count,
    331                oldFields + oldZero + index,
    332                sizeof(Field) * (fLength - index));
    333 
    334        if (fUsingHeap) {
    335            uprv_free(oldChars);
    336            uprv_free(oldFields);
    337        }
    338        fUsingHeap = true;
    339        fChars.heap.ptr = newChars;
    340        fChars.heap.capacity = newCapacity;
    341        fFields.heap.ptr = newFields;
    342        fFields.heap.capacity = newCapacity;
    343    } else {
    344        newZero = (oldCapacity - newLength) / 2;
    345 
    346        // C++ note: memmove is required because src and dest may overlap.
    347        // First copy the entire string to the location of the prefix, and then move the suffix
    348        // to make room for the new chars that the caller wants to insert.
    349        uprv_memmove2(oldChars + newZero, oldChars + oldZero, sizeof(char16_t) * fLength);
    350        uprv_memmove2(oldChars + newZero + index + count,
    351                oldChars + newZero + index,
    352                sizeof(char16_t) * (fLength - index));
    353        uprv_memmove2(oldFields + newZero, oldFields + oldZero, sizeof(Field) * fLength);
    354        uprv_memmove2(oldFields + newZero + index + count,
    355                oldFields + newZero + index,
    356                sizeof(Field) * (fLength - index));
    357    }
    358    fZero = newZero;
    359    fLength = newLength;
    360    return fZero + index;
    361 }
    362 
    363 int32_t FormattedStringBuilder::remove(int32_t index, int32_t count) {
    364     U_ASSERT(0 <= index);
    365     U_ASSERT(index <= fLength);
    366     U_ASSERT(count <= (fLength - index));
    367     U_ASSERT(index <= getCapacity() - fZero);
    368 
    369    int32_t position = index + fZero;
    370    // TODO: Reset the heap here?  (If the string after removal can fit on stack?)
    371    uprv_memmove2(getCharPtr() + position,
    372            getCharPtr() + position + count,
    373            sizeof(char16_t) * (fLength - index - count));
    374    uprv_memmove2(getFieldPtr() + position,
    375            getFieldPtr() + position + count,
    376            sizeof(Field) * (fLength - index - count));
    377    fLength -= count;
    378    return position;
    379 }
    380 
    381 UnicodeString FormattedStringBuilder::toUnicodeString() const {
    382    return UnicodeString(getCharPtr() + fZero, fLength);
    383 }
    384 
    385 UnicodeString FormattedStringBuilder::toTempUnicodeString() const {
    386    // Readonly-alias constructor:
    387    return UnicodeString(false, getCharPtr() + fZero, fLength);
    388 }
    389 
    390 UnicodeString FormattedStringBuilder::toDebugString() const {
    391    UnicodeString sb;
    392    sb.append(u"<FormattedStringBuilder [", -1);
    393    sb.append(toUnicodeString());
    394    sb.append(u"] [", -1);
    395    for (int i = 0; i < fLength; i++) {
    396        if (fieldAt(i) == kUndefinedField) {
    397            sb.append(u'n');
    398        } else if (fieldAt(i).getCategory() == UFIELD_CATEGORY_NUMBER) {
    399            char16_t c;
    400            switch (fieldAt(i).getField()) {
    401                case UNUM_SIGN_FIELD:
    402                    c = u'-';
    403                    break;
    404                case UNUM_INTEGER_FIELD:
    405                    c = u'i';
    406                    break;
    407                case UNUM_FRACTION_FIELD:
    408                    c = u'f';
    409                    break;
    410                case UNUM_EXPONENT_FIELD:
    411                    c = u'e';
    412                    break;
    413                case UNUM_EXPONENT_SIGN_FIELD:
    414                    c = u'+';
    415                    break;
    416                case UNUM_EXPONENT_SYMBOL_FIELD:
    417                    c = u'E';
    418                    break;
    419                case UNUM_DECIMAL_SEPARATOR_FIELD:
    420                    c = u'.';
    421                    break;
    422                case UNUM_GROUPING_SEPARATOR_FIELD:
    423                    c = u',';
    424                    break;
    425                case UNUM_PERCENT_FIELD:
    426                    c = u'%';
    427                    break;
    428                case UNUM_PERMILL_FIELD:
    429                    c = u'‰';
    430                    break;
    431                case UNUM_CURRENCY_FIELD:
    432                    c = u'$';
    433                    break;
    434                default:
    435                    c = u'0' + fieldAt(i).getField();
    436                    break;
    437            }
    438            sb.append(c);
    439        } else {
    440            sb.append(u'0' + fieldAt(i).getCategory());
    441        }
    442    }
    443    sb.append(u"]>", -1);
    444    return sb;
    445 }
    446 
    447 const char16_t *FormattedStringBuilder::chars() const {
    448    return getCharPtr() + fZero;
    449 }
    450 
    451 bool FormattedStringBuilder::contentEquals(const FormattedStringBuilder &other) const {
    452    if (fLength != other.fLength) {
    453        return false;
    454    }
    455    for (int32_t i = 0; i < fLength; i++) {
    456        if (charAt(i) != other.charAt(i) || fieldAt(i) != other.fieldAt(i)) {
    457            return false;
    458        }
    459    }
    460    return true;
    461 }
    462 
    463 bool FormattedStringBuilder::containsField(Field field) const {
    464    for (int32_t i = 0; i < fLength; i++) {
    465        if (field == fieldAt(i)) {
    466            return true;
    467        }
    468    }
    469    return false;
    470 }
    471 
    472 U_NAMESPACE_END
    473 
    474 #endif /* #if !UCONFIG_NO_FORMATTING */