formatted_string_builder.cpp (15395B)
1 // © 2017 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #include "unicode/utypes.h" 5 6 #if !UCONFIG_NO_FORMATTING 7 8 #include "formatted_string_builder.h" 9 #include "putilimp.h" 10 #include "unicode/ustring.h" 11 #include "unicode/utf16.h" 12 #include "unicode/unum.h" // for UNumberFormatFields literals 13 14 namespace { 15 16 // A version of uprv_memcpy that checks for length 0. 17 // By default, uprv_memcpy requires a length of at least 1. 18 inline void uprv_memcpy2(void* dest, const void* src, size_t len) { 19 if (len > 0) { 20 uprv_memcpy(dest, src, len); 21 } 22 } 23 24 // A version of uprv_memmove that checks for length 0. 25 // By default, uprv_memmove requires a length of at least 1. 26 inline void uprv_memmove2(void* dest, const void* src, size_t len) { 27 if (len > 0) { 28 uprv_memmove(dest, src, len); 29 } 30 } 31 32 } // namespace 33 34 35 U_NAMESPACE_BEGIN 36 37 FormattedStringBuilder::FormattedStringBuilder() { 38 #if U_DEBUG 39 // Initializing the memory to non-zero helps catch some bugs that involve 40 // reading from an improperly terminated string. 41 for (int32_t i=0; i<getCapacity(); i++) { 42 getCharPtr()[i] = 1; 43 } 44 #endif 45 } 46 47 FormattedStringBuilder::~FormattedStringBuilder() { 48 if (fUsingHeap) { 49 uprv_free(fChars.heap.ptr); 50 uprv_free(fFields.heap.ptr); 51 } 52 } 53 54 FormattedStringBuilder::FormattedStringBuilder(const FormattedStringBuilder &other) { 55 *this = other; 56 } 57 58 FormattedStringBuilder &FormattedStringBuilder::operator=(const FormattedStringBuilder &other) { 59 // Check for self-assignment 60 if (this == &other) { 61 return *this; 62 } 63 64 // Continue with deallocation and copying 65 if (fUsingHeap) { 66 uprv_free(fChars.heap.ptr); 67 uprv_free(fFields.heap.ptr); 68 fUsingHeap = false; 69 } 70 71 int32_t capacity = other.getCapacity(); 72 if (capacity > DEFAULT_CAPACITY) { 73 // FIXME: uprv_malloc 74 // C++ note: malloc appears in two places: here and in prepareForInsertHelper. 75 auto* newChars = static_cast<char16_t*>(uprv_malloc(sizeof(char16_t) * capacity)); 76 auto* newFields = static_cast<Field*>(uprv_malloc(sizeof(Field) * capacity)); 77 if (newChars == nullptr || newFields == nullptr) { 78 // UErrorCode is not available; fail silently. 79 uprv_free(newChars); 80 uprv_free(newFields); 81 *this = FormattedStringBuilder(); // can't fail 82 return *this; 83 } 84 85 fUsingHeap = true; 86 fChars.heap.capacity = capacity; 87 fChars.heap.ptr = newChars; 88 fFields.heap.capacity = capacity; 89 fFields.heap.ptr = newFields; 90 } 91 92 uprv_memcpy2(getCharPtr(), other.getCharPtr(), sizeof(char16_t) * capacity); 93 uprv_memcpy2(getFieldPtr(), other.getFieldPtr(), sizeof(Field) * capacity); 94 95 fZero = other.fZero; 96 fLength = other.fLength; 97 return *this; 98 } 99 100 int32_t FormattedStringBuilder::length() const { 101 return fLength; 102 } 103 104 int32_t FormattedStringBuilder::codePointCount() const { 105 return u_countChar32(getCharPtr() + fZero, fLength); 106 } 107 108 UChar32 FormattedStringBuilder::getFirstCodePoint() const { 109 if (fLength == 0) { 110 return -1; 111 } 112 UChar32 cp; 113 U16_GET(getCharPtr() + fZero, 0, 0, fLength, cp); 114 return cp; 115 } 116 117 UChar32 FormattedStringBuilder::getLastCodePoint() const { 118 if (fLength == 0) { 119 return -1; 120 } 121 int32_t offset = fLength; 122 U16_BACK_1(getCharPtr() + fZero, 0, offset); 123 UChar32 cp; 124 U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp); 125 return cp; 126 } 127 128 UChar32 FormattedStringBuilder::codePointAt(int32_t index) const { 129 UChar32 cp; 130 U16_GET(getCharPtr() + fZero, 0, index, fLength, cp); 131 return cp; 132 } 133 134 UChar32 FormattedStringBuilder::codePointBefore(int32_t index) const { 135 int32_t offset = index; 136 U16_BACK_1(getCharPtr() + fZero, 0, offset); 137 UChar32 cp; 138 U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp); 139 return cp; 140 } 141 142 FormattedStringBuilder &FormattedStringBuilder::clear() { 143 // TODO: Reset the heap here? 144 fZero = getCapacity() / 2; 145 fLength = 0; 146 return *this; 147 } 148 149 int32_t 150 FormattedStringBuilder::insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status) { 151 int32_t count = U16_LENGTH(codePoint); 152 int32_t position = prepareForInsert(index, count, status); 153 if (U_FAILURE(status)) { 154 return count; 155 } 156 auto* charPtr = getCharPtr(); 157 auto* fieldPtr = getFieldPtr(); 158 if (count == 1) { 159 charPtr[position] = static_cast<char16_t>(codePoint); 160 fieldPtr[position] = field; 161 } else { 162 charPtr[position] = U16_LEAD(codePoint); 163 charPtr[position + 1] = U16_TRAIL(codePoint); 164 fieldPtr[position] = fieldPtr[position + 1] = field; 165 } 166 return count; 167 } 168 169 int32_t FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, Field field, 170 UErrorCode &status) { 171 if (unistr.length() == 0) { 172 // Nothing to insert. 173 return 0; 174 } else if (unistr.length() == 1) { 175 // Fast path: insert using insertCodePoint. 176 return insertCodePoint(index, unistr.charAt(0), field, status); 177 } else { 178 return insert(index, unistr, 0, unistr.length(), field, status); 179 } 180 } 181 182 int32_t 183 FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end, 184 Field field, UErrorCode &status) { 185 int32_t count = end - start; 186 int32_t position = prepareForInsert(index, count, status); 187 if (U_FAILURE(status)) { 188 return count; 189 } 190 for (int32_t i = 0; i < count; i++) { 191 getCharPtr()[position + i] = unistr.charAt(start + i); 192 getFieldPtr()[position + i] = field; 193 } 194 return count; 195 } 196 197 int32_t 198 FormattedStringBuilder::splice(int32_t startThis, int32_t endThis, const UnicodeString &unistr, 199 int32_t startOther, int32_t endOther, Field field, UErrorCode& status) { 200 int32_t thisLength = endThis - startThis; 201 int32_t otherLength = endOther - startOther; 202 int32_t count = otherLength - thisLength; 203 if (U_FAILURE(status)) { 204 return count; 205 } 206 int32_t position; 207 if (count > 0) { 208 // Overall, chars need to be added. 209 position = prepareForInsert(startThis, count, status); 210 } else { 211 // Overall, chars need to be removed or kept the same. 212 position = remove(startThis, -count); 213 } 214 if (U_FAILURE(status)) { 215 return count; 216 } 217 for (int32_t i = 0; i < otherLength; i++) { 218 getCharPtr()[position + i] = unistr.charAt(startOther + i); 219 getFieldPtr()[position + i] = field; 220 } 221 return count; 222 } 223 224 int32_t FormattedStringBuilder::append(const FormattedStringBuilder &other, UErrorCode &status) { 225 return insert(fLength, other, status); 226 } 227 228 int32_t 229 FormattedStringBuilder::insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status) { 230 if (U_FAILURE(status)) { 231 return 0; 232 } 233 if (this == &other) { 234 status = U_ILLEGAL_ARGUMENT_ERROR; 235 return 0; 236 } 237 int32_t count = other.fLength; 238 if (count == 0) { 239 // Nothing to insert. 240 return 0; 241 } 242 int32_t position = prepareForInsert(index, count, status); 243 if (U_FAILURE(status)) { 244 return count; 245 } 246 for (int32_t i = 0; i < count; i++) { 247 getCharPtr()[position + i] = other.charAt(i); 248 getFieldPtr()[position + i] = other.fieldAt(i); 249 } 250 return count; 251 } 252 253 void FormattedStringBuilder::writeTerminator(UErrorCode& status) { 254 int32_t position = prepareForInsert(fLength, 1, status); 255 if (U_FAILURE(status)) { 256 return; 257 } 258 getCharPtr()[position] = 0; 259 getFieldPtr()[position] = kUndefinedField; 260 fLength--; 261 } 262 263 int32_t FormattedStringBuilder::prepareForInsert(int32_t index, int32_t count, UErrorCode &status) { 264 U_ASSERT(index >= 0); 265 U_ASSERT(index <= fLength); 266 U_ASSERT(count >= 0); 267 U_ASSERT(fZero >= 0); 268 U_ASSERT(fLength >= 0); 269 U_ASSERT(getCapacity() - fZero >= fLength); 270 if (U_FAILURE(status)) { 271 return count; 272 } 273 if (index == 0 && fZero - count >= 0) { 274 // Append to start 275 fZero -= count; 276 fLength += count; 277 return fZero; 278 } else if (index == fLength && count <= getCapacity() - fZero - fLength) { 279 // Append to end 280 fLength += count; 281 return fZero + fLength - count; 282 } else { 283 // Move chars around and/or allocate more space 284 return prepareForInsertHelper(index, count, status); 285 } 286 } 287 288 int32_t FormattedStringBuilder::prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status) { 289 int32_t oldCapacity = getCapacity(); 290 int32_t oldZero = fZero; 291 char16_t *oldChars = getCharPtr(); 292 Field *oldFields = getFieldPtr(); 293 int32_t newLength; 294 if (uprv_add32_overflow(fLength, count, &newLength)) { 295 status = U_INPUT_TOO_LONG_ERROR; 296 return -1; 297 } 298 int32_t newZero; 299 if (newLength > oldCapacity) { 300 if (newLength > INT32_MAX / 2) { 301 // We do not support more than 1G char16_t in this code because 302 // dealing with >2G *bytes* can cause subtle bugs. 303 status = U_INPUT_TOO_LONG_ERROR; 304 return -1; 305 } 306 // Keep newCapacity also to at most 1G char16_t. 307 int32_t newCapacity = newLength * 2; 308 newZero = (newCapacity - newLength) / 2; 309 310 // C++ note: malloc appears in two places: here and in the assignment operator. 311 auto* newChars = 312 static_cast<char16_t*>(uprv_malloc(sizeof(char16_t) * static_cast<size_t>(newCapacity))); 313 auto* newFields = 314 static_cast<Field*>(uprv_malloc(sizeof(Field) * static_cast<size_t>(newCapacity))); 315 if (newChars == nullptr || newFields == nullptr) { 316 uprv_free(newChars); 317 uprv_free(newFields); 318 status = U_MEMORY_ALLOCATION_ERROR; 319 return -1; 320 } 321 322 // First copy the prefix and then the suffix, leaving room for the new chars that the 323 // caller wants to insert. 324 // C++ note: memcpy is OK because the src and dest do not overlap. 325 uprv_memcpy2(newChars + newZero, oldChars + oldZero, sizeof(char16_t) * index); 326 uprv_memcpy2(newChars + newZero + index + count, 327 oldChars + oldZero + index, 328 sizeof(char16_t) * (fLength - index)); 329 uprv_memcpy2(newFields + newZero, oldFields + oldZero, sizeof(Field) * index); 330 uprv_memcpy2(newFields + newZero + index + count, 331 oldFields + oldZero + index, 332 sizeof(Field) * (fLength - index)); 333 334 if (fUsingHeap) { 335 uprv_free(oldChars); 336 uprv_free(oldFields); 337 } 338 fUsingHeap = true; 339 fChars.heap.ptr = newChars; 340 fChars.heap.capacity = newCapacity; 341 fFields.heap.ptr = newFields; 342 fFields.heap.capacity = newCapacity; 343 } else { 344 newZero = (oldCapacity - newLength) / 2; 345 346 // C++ note: memmove is required because src and dest may overlap. 347 // First copy the entire string to the location of the prefix, and then move the suffix 348 // to make room for the new chars that the caller wants to insert. 349 uprv_memmove2(oldChars + newZero, oldChars + oldZero, sizeof(char16_t) * fLength); 350 uprv_memmove2(oldChars + newZero + index + count, 351 oldChars + newZero + index, 352 sizeof(char16_t) * (fLength - index)); 353 uprv_memmove2(oldFields + newZero, oldFields + oldZero, sizeof(Field) * fLength); 354 uprv_memmove2(oldFields + newZero + index + count, 355 oldFields + newZero + index, 356 sizeof(Field) * (fLength - index)); 357 } 358 fZero = newZero; 359 fLength = newLength; 360 return fZero + index; 361 } 362 363 int32_t FormattedStringBuilder::remove(int32_t index, int32_t count) { 364 U_ASSERT(0 <= index); 365 U_ASSERT(index <= fLength); 366 U_ASSERT(count <= (fLength - index)); 367 U_ASSERT(index <= getCapacity() - fZero); 368 369 int32_t position = index + fZero; 370 // TODO: Reset the heap here? (If the string after removal can fit on stack?) 371 uprv_memmove2(getCharPtr() + position, 372 getCharPtr() + position + count, 373 sizeof(char16_t) * (fLength - index - count)); 374 uprv_memmove2(getFieldPtr() + position, 375 getFieldPtr() + position + count, 376 sizeof(Field) * (fLength - index - count)); 377 fLength -= count; 378 return position; 379 } 380 381 UnicodeString FormattedStringBuilder::toUnicodeString() const { 382 return UnicodeString(getCharPtr() + fZero, fLength); 383 } 384 385 UnicodeString FormattedStringBuilder::toTempUnicodeString() const { 386 // Readonly-alias constructor: 387 return UnicodeString(false, getCharPtr() + fZero, fLength); 388 } 389 390 UnicodeString FormattedStringBuilder::toDebugString() const { 391 UnicodeString sb; 392 sb.append(u"<FormattedStringBuilder [", -1); 393 sb.append(toUnicodeString()); 394 sb.append(u"] [", -1); 395 for (int i = 0; i < fLength; i++) { 396 if (fieldAt(i) == kUndefinedField) { 397 sb.append(u'n'); 398 } else if (fieldAt(i).getCategory() == UFIELD_CATEGORY_NUMBER) { 399 char16_t c; 400 switch (fieldAt(i).getField()) { 401 case UNUM_SIGN_FIELD: 402 c = u'-'; 403 break; 404 case UNUM_INTEGER_FIELD: 405 c = u'i'; 406 break; 407 case UNUM_FRACTION_FIELD: 408 c = u'f'; 409 break; 410 case UNUM_EXPONENT_FIELD: 411 c = u'e'; 412 break; 413 case UNUM_EXPONENT_SIGN_FIELD: 414 c = u'+'; 415 break; 416 case UNUM_EXPONENT_SYMBOL_FIELD: 417 c = u'E'; 418 break; 419 case UNUM_DECIMAL_SEPARATOR_FIELD: 420 c = u'.'; 421 break; 422 case UNUM_GROUPING_SEPARATOR_FIELD: 423 c = u','; 424 break; 425 case UNUM_PERCENT_FIELD: 426 c = u'%'; 427 break; 428 case UNUM_PERMILL_FIELD: 429 c = u'‰'; 430 break; 431 case UNUM_CURRENCY_FIELD: 432 c = u'$'; 433 break; 434 default: 435 c = u'0' + fieldAt(i).getField(); 436 break; 437 } 438 sb.append(c); 439 } else { 440 sb.append(u'0' + fieldAt(i).getCategory()); 441 } 442 } 443 sb.append(u"]>", -1); 444 return sb; 445 } 446 447 const char16_t *FormattedStringBuilder::chars() const { 448 return getCharPtr() + fZero; 449 } 450 451 bool FormattedStringBuilder::contentEquals(const FormattedStringBuilder &other) const { 452 if (fLength != other.fLength) { 453 return false; 454 } 455 for (int32_t i = 0; i < fLength; i++) { 456 if (charAt(i) != other.charAt(i) || fieldAt(i) != other.fieldAt(i)) { 457 return false; 458 } 459 } 460 return true; 461 } 462 463 bool FormattedStringBuilder::containsField(Field field) const { 464 for (int32_t i = 0; i < fLength; i++) { 465 if (field == fieldAt(i)) { 466 return true; 467 } 468 } 469 return false; 470 } 471 472 U_NAMESPACE_END 473 474 #endif /* #if !UCONFIG_NO_FORMATTING */