formatted_string_builder.h (8947B)
1 // © 2017 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #include "unicode/utypes.h" 5 6 #if !UCONFIG_NO_FORMATTING 7 #ifndef __NUMBER_STRINGBUILDER_H__ 8 #define __NUMBER_STRINGBUILDER_H__ 9 10 11 #include <cstdint> 12 #include <type_traits> 13 14 #include "cstring.h" 15 #include "uassert.h" 16 #include "fphdlimp.h" 17 18 U_NAMESPACE_BEGIN 19 20 class FormattedValueStringBuilderImpl; 21 22 /** 23 * A StringBuilder optimized for formatting. It implements the following key 24 * features beyond a UnicodeString: 25 * 26 * <ol> 27 * <li>Efficient prepend as well as append. 28 * <li>Keeps track of Fields in an efficient manner. 29 * </ol> 30 * 31 * See also FormattedValueStringBuilderImpl. 32 * 33 * @author sffc (Shane Carr) 34 */ 35 class U_I18N_API FormattedStringBuilder : public UMemory { 36 private: 37 static const int32_t DEFAULT_CAPACITY = 40; 38 39 template<typename T> 40 union ValueOrHeapArray { 41 T value[DEFAULT_CAPACITY]; 42 struct { 43 T *ptr; 44 int32_t capacity; 45 } heap; 46 }; 47 48 public: 49 FormattedStringBuilder(); 50 51 ~FormattedStringBuilder(); 52 53 FormattedStringBuilder(const FormattedStringBuilder &other); 54 55 // Convention: bottom 4 bits for field, top 4 bits for field category. 56 // Field category 0 implies the number category so that the number field 57 // literals can be directly passed as a Field type. 58 // Exported as U_I18N_API so it can be used by other exports on Windows. 59 struct U_I18N_API Field { 60 uint8_t bits; 61 62 Field() = default; 63 constexpr Field(uint8_t category, uint8_t field); 64 65 inline UFieldCategory getCategory() const; 66 inline int32_t getField() const; 67 inline bool isNumeric() const; 68 inline bool isUndefined() const; 69 inline bool operator==(const Field& other) const; 70 inline bool operator!=(const Field& other) const; 71 }; 72 73 FormattedStringBuilder &operator=(const FormattedStringBuilder &other); 74 75 int32_t length() const; 76 77 int32_t codePointCount() const; 78 79 inline char16_t charAt(int32_t index) const { 80 U_ASSERT(index >= 0); 81 U_ASSERT(index < fLength); 82 return getCharPtr()[fZero + index]; 83 } 84 85 inline Field fieldAt(int32_t index) const { 86 U_ASSERT(index >= 0); 87 U_ASSERT(index < fLength); 88 return getFieldPtr()[fZero + index]; 89 } 90 91 UChar32 getFirstCodePoint() const; 92 93 UChar32 getLastCodePoint() const; 94 95 UChar32 codePointAt(int32_t index) const; 96 97 UChar32 codePointBefore(int32_t index) const; 98 99 FormattedStringBuilder &clear(); 100 101 /** Appends a UTF-16 code unit. */ 102 inline int32_t appendChar16(char16_t codeUnit, Field field, UErrorCode& status) { 103 // appendCodePoint handles both code units and code points. 104 return insertCodePoint(fLength, codeUnit, field, status); 105 } 106 107 /** Inserts a UTF-16 code unit. Note: insert at index 0 is very efficient. */ 108 inline int32_t insertChar16(int32_t index, char16_t codeUnit, Field field, UErrorCode& status) { 109 // insertCodePoint handles both code units and code points. 110 return insertCodePoint(index, codeUnit, field, status); 111 } 112 113 /** Appends a Unicode code point. */ 114 inline int32_t appendCodePoint(UChar32 codePoint, Field field, UErrorCode &status) { 115 return insertCodePoint(fLength, codePoint, field, status); 116 } 117 118 /** Inserts a Unicode code point. Note: insert at index 0 is very efficient. */ 119 int32_t insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status); 120 121 /** Appends a string. */ 122 inline int32_t append(const UnicodeString &unistr, Field field, UErrorCode &status) { 123 return insert(fLength, unistr, field, status); 124 } 125 126 /** Inserts a string. Note: insert at index 0 is very efficient. */ 127 int32_t insert(int32_t index, const UnicodeString &unistr, Field field, UErrorCode &status); 128 129 /** Inserts a substring. Note: insert at index 0 is very efficient. 130 * 131 * @param start Start index of the substring of unistr to be inserted. 132 * @param end End index of the substring of unistr to be inserted (exclusive). 133 */ 134 int32_t insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end, Field field, 135 UErrorCode &status); 136 137 /** Deletes a substring and then inserts a string at that same position. 138 * Similar to JavaScript Array.prototype.splice(). 139 * 140 * @param startThis Start of the span to delete. 141 * @param endThis End of the span to delete (exclusive). 142 * @param unistr The string to insert at the deletion position. 143 * @param startOther Start index of the substring of unistr to be inserted. 144 * @param endOther End index of the substring of unistr to be inserted (exclusive). 145 */ 146 int32_t splice(int32_t startThis, int32_t endThis, const UnicodeString &unistr, 147 int32_t startOther, int32_t endOther, Field field, UErrorCode& status); 148 149 /** Appends a formatted string. */ 150 int32_t append(const FormattedStringBuilder &other, UErrorCode &status); 151 152 /** Inserts a formatted string. Note: insert at index 0 is very efficient. */ 153 int32_t insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status); 154 155 /** 156 * Ensures that the string buffer contains a NUL terminator. The NUL terminator does 157 * not count toward the string length. Any further changes to the string (insert or 158 * append) may invalidate the NUL terminator. 159 * 160 * You should call this method after the formatted string is completely built if you 161 * plan to return a pointer to the string from a C API. 162 */ 163 void writeTerminator(UErrorCode& status); 164 165 /** 166 * Gets a "safe" UnicodeString that can be used even after the FormattedStringBuilder is destructed. 167 */ 168 UnicodeString toUnicodeString() const; 169 170 /** 171 * Gets an "unsafe" UnicodeString that is valid only as long as the FormattedStringBuilder is alive and 172 * unchanged. Slightly faster than toUnicodeString(). 173 */ 174 UnicodeString toTempUnicodeString() const; 175 176 UnicodeString toDebugString() const; 177 178 const char16_t *chars() const; 179 180 bool contentEquals(const FormattedStringBuilder &other) const; 181 182 bool containsField(Field field) const; 183 184 private: 185 bool fUsingHeap = false; 186 ValueOrHeapArray<char16_t> fChars; 187 ValueOrHeapArray<Field> fFields; 188 int32_t fZero = DEFAULT_CAPACITY / 2; 189 int32_t fLength = 0; 190 191 inline char16_t *getCharPtr() { 192 return fUsingHeap ? fChars.heap.ptr : fChars.value; 193 } 194 195 inline const char16_t *getCharPtr() const { 196 return fUsingHeap ? fChars.heap.ptr : fChars.value; 197 } 198 199 inline Field *getFieldPtr() { 200 return fUsingHeap ? fFields.heap.ptr : fFields.value; 201 } 202 203 inline const Field *getFieldPtr() const { 204 return fUsingHeap ? fFields.heap.ptr : fFields.value; 205 } 206 207 inline int32_t getCapacity() const { 208 return fUsingHeap ? fChars.heap.capacity : DEFAULT_CAPACITY; 209 } 210 211 int32_t prepareForInsert(int32_t index, int32_t count, UErrorCode &status); 212 213 int32_t prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status); 214 215 int32_t remove(int32_t index, int32_t count); 216 217 friend class FormattedValueStringBuilderImpl; 218 }; 219 220 static_assert( 221 // std::is_pod<> is deprecated. 222 std::is_standard_layout_v<FormattedStringBuilder::Field> && 223 std::is_trivial_v<FormattedStringBuilder::Field>, 224 "Field should be a POD type for efficient initialization"); 225 226 constexpr FormattedStringBuilder::Field::Field(uint8_t category, uint8_t field) 227 : bits(( 228 U_ASSERT(category <= 0xf), 229 U_ASSERT(field <= 0xf), 230 static_cast<uint8_t>((category << 4) | field) 231 )) {} 232 233 /** 234 * Internal constant for the undefined field for use in FormattedStringBuilder. 235 */ 236 constexpr FormattedStringBuilder::Field kUndefinedField = {UFIELD_CATEGORY_UNDEFINED, 0}; 237 238 /** 239 * Internal field to signal "numeric" when fields are not supported in NumberFormat. 240 */ 241 constexpr FormattedStringBuilder::Field kGeneralNumericField = {UFIELD_CATEGORY_UNDEFINED, 1}; 242 243 inline UFieldCategory FormattedStringBuilder::Field::getCategory() const { 244 return static_cast<UFieldCategory>(bits >> 4); 245 } 246 247 inline int32_t FormattedStringBuilder::Field::getField() const { 248 return bits & 0xf; 249 } 250 251 inline bool FormattedStringBuilder::Field::isNumeric() const { 252 return getCategory() == UFIELD_CATEGORY_NUMBER || *this == kGeneralNumericField; 253 } 254 255 inline bool FormattedStringBuilder::Field::isUndefined() const { 256 return getCategory() == UFIELD_CATEGORY_UNDEFINED; 257 } 258 259 inline bool FormattedStringBuilder::Field::operator==(const Field& other) const { 260 return bits == other.bits; 261 } 262 263 inline bool FormattedStringBuilder::Field::operator!=(const Field& other) const { 264 return bits != other.bits; 265 } 266 267 U_NAMESPACE_END 268 269 270 #endif //__NUMBER_STRINGBUILDER_H__ 271 272 #endif /* #if !UCONFIG_NO_FORMATTING */