tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

formatted_string_builder.h (8947B)


      1 // © 2017 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 #include "unicode/utypes.h"
      5 
      6 #if !UCONFIG_NO_FORMATTING
      7 #ifndef __NUMBER_STRINGBUILDER_H__
      8 #define __NUMBER_STRINGBUILDER_H__
      9 
     10 
     11 #include <cstdint>
     12 #include <type_traits>
     13 
     14 #include "cstring.h"
     15 #include "uassert.h"
     16 #include "fphdlimp.h"
     17 
     18 U_NAMESPACE_BEGIN
     19 
     20 class FormattedValueStringBuilderImpl;
     21 
     22 /**
     23 * A StringBuilder optimized for formatting. It implements the following key
     24 * features beyond a UnicodeString:
     25 *
     26 * <ol>
     27 * <li>Efficient prepend as well as append.
     28 * <li>Keeps track of Fields in an efficient manner.
     29 * </ol>
     30 *
     31 * See also FormattedValueStringBuilderImpl.
     32 *
     33 * @author sffc (Shane Carr)
     34 */
     35 class U_I18N_API FormattedStringBuilder : public UMemory {
     36  private:
     37    static const int32_t DEFAULT_CAPACITY = 40;
     38 
     39    template<typename T>
     40    union ValueOrHeapArray {
     41        T value[DEFAULT_CAPACITY];
     42        struct {
     43            T *ptr;
     44            int32_t capacity;
     45        } heap;
     46    };
     47 
     48  public:
     49    FormattedStringBuilder();
     50 
     51    ~FormattedStringBuilder();
     52 
     53    FormattedStringBuilder(const FormattedStringBuilder &other);
     54 
     55    // Convention: bottom 4 bits for field, top 4 bits for field category.
     56    // Field category 0 implies the number category so that the number field
     57    // literals can be directly passed as a Field type.
     58    // Exported as U_I18N_API so it can be used by other exports on Windows.
     59    struct U_I18N_API Field {
     60        uint8_t bits;
     61 
     62        Field() = default;
     63        constexpr Field(uint8_t category, uint8_t field);
     64 
     65        inline UFieldCategory getCategory() const;
     66        inline int32_t getField() const;
     67        inline bool isNumeric() const;
     68        inline bool isUndefined() const;
     69        inline bool operator==(const Field& other) const;
     70        inline bool operator!=(const Field& other) const;
     71    };
     72 
     73    FormattedStringBuilder &operator=(const FormattedStringBuilder &other);
     74 
     75    int32_t length() const;
     76 
     77    int32_t codePointCount() const;
     78 
     79    inline char16_t charAt(int32_t index) const {
     80        U_ASSERT(index >= 0);
     81        U_ASSERT(index < fLength);
     82        return getCharPtr()[fZero + index];
     83    }
     84 
     85    inline Field fieldAt(int32_t index) const {
     86        U_ASSERT(index >= 0);
     87        U_ASSERT(index < fLength);
     88        return getFieldPtr()[fZero + index];
     89    }
     90 
     91    UChar32 getFirstCodePoint() const;
     92 
     93    UChar32 getLastCodePoint() const;
     94 
     95    UChar32 codePointAt(int32_t index) const;
     96 
     97    UChar32 codePointBefore(int32_t index) const;
     98 
     99    FormattedStringBuilder &clear();
    100 
    101    /** Appends a UTF-16 code unit. */
    102    inline int32_t appendChar16(char16_t codeUnit, Field field, UErrorCode& status) {
    103        // appendCodePoint handles both code units and code points.
    104        return insertCodePoint(fLength, codeUnit, field, status);
    105    }
    106 
    107    /** Inserts a UTF-16 code unit. Note: insert at index 0 is very efficient. */
    108    inline int32_t insertChar16(int32_t index, char16_t codeUnit, Field field, UErrorCode& status) {
    109        // insertCodePoint handles both code units and code points.
    110        return insertCodePoint(index, codeUnit, field, status);
    111    }
    112 
    113    /** Appends a Unicode code point. */
    114    inline int32_t appendCodePoint(UChar32 codePoint, Field field, UErrorCode &status) {
    115        return insertCodePoint(fLength, codePoint, field, status);
    116    }
    117 
    118    /** Inserts a Unicode code point. Note: insert at index 0 is very efficient. */
    119    int32_t insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status);
    120 
    121    /** Appends a string. */
    122    inline int32_t append(const UnicodeString &unistr, Field field, UErrorCode &status) {
    123        return insert(fLength, unistr, field, status);
    124    }
    125 
    126    /** Inserts a string. Note: insert at index 0 is very efficient. */
    127    int32_t insert(int32_t index, const UnicodeString &unistr, Field field, UErrorCode &status);
    128 
    129    /** Inserts a substring. Note: insert at index 0 is very efficient.
    130     *
    131     * @param start Start index of the substring of unistr to be inserted.
    132     * @param end End index of the substring of unistr to be inserted (exclusive).
    133     */
    134    int32_t insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end, Field field,
    135                   UErrorCode &status);
    136 
    137    /** Deletes a substring and then inserts a string at that same position.
    138     * Similar to JavaScript Array.prototype.splice().
    139     *
    140     * @param startThis Start of the span to delete.
    141     * @param endThis End of the span to delete (exclusive).
    142     * @param unistr The string to insert at the deletion position.
    143     * @param startOther Start index of the substring of unistr to be inserted.
    144     * @param endOther End index of the substring of unistr to be inserted (exclusive).
    145     */
    146    int32_t splice(int32_t startThis, int32_t endThis,  const UnicodeString &unistr,
    147                   int32_t startOther, int32_t endOther, Field field, UErrorCode& status);
    148 
    149    /** Appends a formatted string. */
    150    int32_t append(const FormattedStringBuilder &other, UErrorCode &status);
    151 
    152    /** Inserts a formatted string. Note: insert at index 0 is very efficient. */
    153    int32_t insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status);
    154 
    155    /**
    156     * Ensures that the string buffer contains a NUL terminator. The NUL terminator does
    157     * not count toward the string length. Any further changes to the string (insert or
    158     * append) may invalidate the NUL terminator.
    159     *
    160     * You should call this method after the formatted string is completely built if you
    161     * plan to return a pointer to the string from a C API.
    162     */
    163    void writeTerminator(UErrorCode& status);
    164 
    165    /**
    166     * Gets a "safe" UnicodeString that can be used even after the FormattedStringBuilder is destructed.
    167     */
    168    UnicodeString toUnicodeString() const;
    169 
    170    /**
    171     * Gets an "unsafe" UnicodeString that is valid only as long as the FormattedStringBuilder is alive and
    172     * unchanged. Slightly faster than toUnicodeString().
    173     */
    174    UnicodeString toTempUnicodeString() const;
    175 
    176    UnicodeString toDebugString() const;
    177 
    178    const char16_t *chars() const;
    179 
    180    bool contentEquals(const FormattedStringBuilder &other) const;
    181 
    182    bool containsField(Field field) const;
    183 
    184  private:
    185    bool fUsingHeap = false;
    186    ValueOrHeapArray<char16_t> fChars;
    187    ValueOrHeapArray<Field> fFields;
    188    int32_t fZero = DEFAULT_CAPACITY / 2;
    189    int32_t fLength = 0;
    190 
    191    inline char16_t *getCharPtr() {
    192        return fUsingHeap ? fChars.heap.ptr : fChars.value;
    193    }
    194 
    195    inline const char16_t *getCharPtr() const {
    196        return fUsingHeap ? fChars.heap.ptr : fChars.value;
    197    }
    198 
    199    inline Field *getFieldPtr() {
    200        return fUsingHeap ? fFields.heap.ptr : fFields.value;
    201    }
    202 
    203    inline const Field *getFieldPtr() const {
    204        return fUsingHeap ? fFields.heap.ptr : fFields.value;
    205    }
    206 
    207    inline int32_t getCapacity() const {
    208        return fUsingHeap ? fChars.heap.capacity : DEFAULT_CAPACITY;
    209    }
    210 
    211    int32_t prepareForInsert(int32_t index, int32_t count, UErrorCode &status);
    212 
    213    int32_t prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status);
    214 
    215    int32_t remove(int32_t index, int32_t count);
    216 
    217    friend class FormattedValueStringBuilderImpl;
    218 };
    219 
    220 static_assert(
    221    // std::is_pod<> is deprecated.
    222    std::is_standard_layout_v<FormattedStringBuilder::Field> &&
    223        std::is_trivial_v<FormattedStringBuilder::Field>,
    224    "Field should be a POD type for efficient initialization");
    225 
    226 constexpr FormattedStringBuilder::Field::Field(uint8_t category, uint8_t field)
    227    : bits((
    228        U_ASSERT(category <= 0xf),
    229        U_ASSERT(field <= 0xf),
    230        static_cast<uint8_t>((category << 4) | field)
    231    )) {}
    232 
    233 /**
    234 * Internal constant for the undefined field for use in FormattedStringBuilder.
    235 */
    236 constexpr FormattedStringBuilder::Field kUndefinedField = {UFIELD_CATEGORY_UNDEFINED, 0};
    237 
    238 /**
    239 * Internal field to signal "numeric" when fields are not supported in NumberFormat.
    240 */
    241 constexpr FormattedStringBuilder::Field kGeneralNumericField = {UFIELD_CATEGORY_UNDEFINED, 1};
    242 
    243 inline UFieldCategory FormattedStringBuilder::Field::getCategory() const {
    244    return static_cast<UFieldCategory>(bits >> 4);
    245 }
    246 
    247 inline int32_t FormattedStringBuilder::Field::getField() const {
    248    return bits & 0xf;
    249 }
    250 
    251 inline bool FormattedStringBuilder::Field::isNumeric() const {
    252    return getCategory() == UFIELD_CATEGORY_NUMBER || *this == kGeneralNumericField;
    253 }
    254 
    255 inline bool FormattedStringBuilder::Field::isUndefined() const {
    256    return getCategory() == UFIELD_CATEGORY_UNDEFINED;
    257 }
    258 
    259 inline bool FormattedStringBuilder::Field::operator==(const Field& other) const {
    260    return bits == other.bits;
    261 }
    262 
    263 inline bool FormattedStringBuilder::Field::operator!=(const Field& other) const {
    264    return bits != other.bits;
    265 }
    266 
    267 U_NAMESPACE_END
    268 
    269 
    270 #endif //__NUMBER_STRINGBUILDER_H__
    271 
    272 #endif /* #if !UCONFIG_NO_FORMATTING */