number_modifiers.cpp (19469B)
1 // © 2017 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #include "unicode/utypes.h" 5 6 #if !UCONFIG_NO_FORMATTING 7 8 #include "umutex.h" 9 #include "ucln_cmn.h" 10 #include "ucln_in.h" 11 #include "number_modifiers.h" 12 13 using namespace icu; 14 using namespace icu::number; 15 using namespace icu::number::impl; 16 17 namespace { 18 19 // TODO: This is copied from simpleformatter.cpp 20 const int32_t ARG_NUM_LIMIT = 0x100; 21 22 // These are the default currency spacing UnicodeSets in CLDR. 23 // Pre-compute them for performance. 24 // The Java unit test testCurrencySpacingPatternStability() will start failing if these change in CLDR. 25 icu::UInitOnce gDefaultCurrencySpacingInitOnce {}; 26 27 UnicodeSet *UNISET_DIGIT = nullptr; 28 UnicodeSet *UNISET_NOTSZ = nullptr; 29 30 UBool U_CALLCONV cleanupDefaultCurrencySpacing() { 31 delete UNISET_DIGIT; 32 UNISET_DIGIT = nullptr; 33 delete UNISET_NOTSZ; 34 UNISET_NOTSZ = nullptr; 35 gDefaultCurrencySpacingInitOnce.reset(); 36 return true; 37 } 38 39 void U_CALLCONV initDefaultCurrencySpacing(UErrorCode &status) { 40 ucln_i18n_registerCleanup(UCLN_I18N_CURRENCY_SPACING, cleanupDefaultCurrencySpacing); 41 UNISET_DIGIT = new UnicodeSet(UnicodeString(u"[:digit:]"), status); 42 UNISET_NOTSZ = new UnicodeSet(UnicodeString(u"[[:^S:]&[:^Z:]]"), status); 43 if (UNISET_DIGIT == nullptr || UNISET_NOTSZ == nullptr) { 44 status = U_MEMORY_ALLOCATION_ERROR; 45 return; 46 } 47 UNISET_DIGIT->freeze(); 48 UNISET_NOTSZ->freeze(); 49 } 50 51 } // namespace 52 53 54 Modifier::~Modifier() = default; 55 56 Modifier::Parameters::Parameters() 57 : obj(nullptr) {} 58 59 Modifier::Parameters::Parameters( 60 const ModifierStore* _obj, Signum _signum, StandardPlural::Form _plural) 61 : obj(_obj), signum(_signum), plural(_plural) {} 62 63 bool Modifier::semanticallyEquivalent(const Modifier& other) const { 64 Parameters paramsThis; 65 Parameters paramsOther; 66 getParameters(paramsThis); 67 other.getParameters(paramsOther); 68 if (paramsThis.obj == nullptr && paramsOther.obj == nullptr) { 69 return strictEquals(other); 70 } else if (paramsThis.obj == nullptr || paramsOther.obj == nullptr) { 71 return false; 72 } 73 for (size_t i=0; i<SIGNUM_COUNT; i++) { 74 auto signum = static_cast<Signum>(i); 75 for (size_t j=0; j<StandardPlural::COUNT; j++) { 76 auto plural = static_cast<StandardPlural::Form>(j); 77 const auto* mod1 = paramsThis.obj->getModifier(signum, plural); 78 const auto* mod2 = paramsOther.obj->getModifier(signum, plural); 79 if (mod1 == mod2) { 80 // Equal pointers 81 continue; 82 } else if (mod1 == nullptr || mod2 == nullptr) { 83 // One pointer is null but not the other 84 return false; 85 } else if (!mod1->strictEquals(*mod2)) { 86 // The modifiers are NOT equivalent 87 return false; 88 } else { 89 // The modifiers are equivalent 90 continue; 91 } 92 } 93 } 94 return true; 95 } 96 97 98 ModifierStore::~ModifierStore() = default; 99 100 AdoptingSignumModifierStore::~AdoptingSignumModifierStore() { 101 for (const Modifier *mod : mods) { 102 delete mod; 103 } 104 } 105 106 AdoptingSignumModifierStore& 107 AdoptingSignumModifierStore::operator=(AdoptingSignumModifierStore&& other) noexcept { 108 for (size_t i=0; i<SIGNUM_COUNT; i++) { 109 this->mods[i] = other.mods[i]; 110 other.mods[i] = nullptr; 111 } 112 return *this; 113 } 114 115 116 int32_t ConstantAffixModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex, 117 UErrorCode &status) const { 118 // Insert the suffix first since inserting the prefix will change the rightIndex 119 int length = output.insert(rightIndex, fSuffix, fField, status); 120 length += output.insert(leftIndex, fPrefix, fField, status); 121 return length; 122 } 123 124 int32_t ConstantAffixModifier::getPrefixLength() const { 125 return fPrefix.length(); 126 } 127 128 int32_t ConstantAffixModifier::getCodePointCount() const { 129 return fPrefix.countChar32() + fSuffix.countChar32(); 130 } 131 132 bool ConstantAffixModifier::isStrong() const { 133 return fStrong; 134 } 135 136 bool ConstantAffixModifier::containsField(Field field) const { 137 (void)field; 138 // This method is not currently used. 139 UPRV_UNREACHABLE_EXIT; 140 } 141 142 void ConstantAffixModifier::getParameters(Parameters& output) const { 143 (void)output; 144 // This method is not currently used. 145 UPRV_UNREACHABLE_EXIT; 146 } 147 148 bool ConstantAffixModifier::strictEquals(const Modifier& other) const { 149 const auto* _other = dynamic_cast<const ConstantAffixModifier*>(&other); 150 if (_other == nullptr) { 151 return false; 152 } 153 return fPrefix == _other->fPrefix 154 && fSuffix == _other->fSuffix 155 && fField == _other->fField 156 && fStrong == _other->fStrong; 157 } 158 159 160 SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong) 161 : SimpleModifier(simpleFormatter, field, strong, {}) {} 162 163 SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong, 164 const Modifier::Parameters parameters) 165 : fCompiledPattern(simpleFormatter.compiledPattern), fField(field), fStrong(strong), 166 fParameters(parameters) { 167 int32_t argLimit = SimpleFormatter::getArgumentLimit( 168 fCompiledPattern.getBuffer(), fCompiledPattern.length()); 169 if (argLimit == 0) { 170 // No arguments in compiled pattern 171 fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT; 172 U_ASSERT(2 + fPrefixLength == fCompiledPattern.length()); 173 // Set suffixOffset = -1 to indicate no arguments in compiled pattern. 174 fSuffixOffset = -1; 175 fSuffixLength = 0; 176 } else { 177 U_ASSERT(argLimit == 1); 178 if (fCompiledPattern.charAt(1) != 0) { 179 // Found prefix 180 fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT; 181 fSuffixOffset = 3 + fPrefixLength; 182 } else { 183 // No prefix 184 fPrefixLength = 0; 185 fSuffixOffset = 2; 186 } 187 if (3 + fPrefixLength < fCompiledPattern.length()) { 188 // Found suffix 189 fSuffixLength = fCompiledPattern.charAt(fSuffixOffset) - ARG_NUM_LIMIT; 190 } else { 191 // No suffix 192 fSuffixLength = 0; 193 } 194 } 195 } 196 197 SimpleModifier::SimpleModifier() 198 : fField(kUndefinedField), fStrong(false), fPrefixLength(0), fSuffixLength(0) { 199 } 200 201 int32_t SimpleModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex, 202 UErrorCode &status) const { 203 return formatAsPrefixSuffix(output, leftIndex, rightIndex, status); 204 } 205 206 int32_t SimpleModifier::getPrefixLength() const { 207 return fPrefixLength; 208 } 209 210 int32_t SimpleModifier::getCodePointCount() const { 211 int32_t count = 0; 212 if (fPrefixLength > 0) { 213 count += fCompiledPattern.countChar32(2, fPrefixLength); 214 } 215 if (fSuffixLength > 0) { 216 count += fCompiledPattern.countChar32(1 + fSuffixOffset, fSuffixLength); 217 } 218 return count; 219 } 220 221 bool SimpleModifier::isStrong() const { 222 return fStrong; 223 } 224 225 bool SimpleModifier::containsField(Field field) const { 226 (void)field; 227 // This method is not currently used. 228 UPRV_UNREACHABLE_EXIT; 229 } 230 231 void SimpleModifier::getParameters(Parameters& output) const { 232 output = fParameters; 233 } 234 235 bool SimpleModifier::strictEquals(const Modifier& other) const { 236 const auto* _other = dynamic_cast<const SimpleModifier*>(&other); 237 if (_other == nullptr) { 238 return false; 239 } 240 return fCompiledPattern == _other->fCompiledPattern 241 && fField == _other->fField 242 && fStrong == _other->fStrong; 243 } 244 245 246 int32_t 247 SimpleModifier::formatAsPrefixSuffix(FormattedStringBuilder &result, int32_t startIndex, int32_t endIndex, 248 UErrorCode &status) const { 249 if (fSuffixOffset == -1 && fPrefixLength + fSuffixLength > 0) { 250 // There is no argument for the inner number; overwrite the entire segment with our string. 251 return result.splice(startIndex, endIndex, fCompiledPattern, 2, 2 + fPrefixLength, fField, status); 252 } else { 253 if (fPrefixLength > 0) { 254 result.insert(startIndex, fCompiledPattern, 2, 2 + fPrefixLength, fField, status); 255 } 256 if (fSuffixLength > 0) { 257 result.insert( 258 endIndex + fPrefixLength, 259 fCompiledPattern, 260 1 + fSuffixOffset, 261 1 + fSuffixOffset + fSuffixLength, 262 fField, 263 status); 264 } 265 return fPrefixLength + fSuffixLength; 266 } 267 } 268 269 270 int32_t 271 SimpleModifier::formatTwoArgPattern(const SimpleFormatter& compiled, FormattedStringBuilder& result, 272 int32_t index, int32_t* outPrefixLength, int32_t* outSuffixLength, 273 Field field, UErrorCode& status) { 274 const UnicodeString& compiledPattern = compiled.compiledPattern; 275 int32_t argLimit = SimpleFormatter::getArgumentLimit( 276 compiledPattern.getBuffer(), compiledPattern.length()); 277 if (argLimit != 2) { 278 status = U_INTERNAL_PROGRAM_ERROR; 279 return 0; 280 } 281 int32_t offset = 1; // offset into compiledPattern 282 int32_t length = 0; // chars added to result 283 284 int32_t prefixLength = compiledPattern.charAt(offset); 285 offset++; 286 if (prefixLength < ARG_NUM_LIMIT) { 287 // No prefix 288 prefixLength = 0; 289 } else { 290 prefixLength -= ARG_NUM_LIMIT; 291 result.insert(index + length, compiledPattern, offset, offset + prefixLength, field, status); 292 offset += prefixLength; 293 length += prefixLength; 294 offset++; 295 } 296 297 int32_t infixLength = compiledPattern.charAt(offset); 298 offset++; 299 if (infixLength < ARG_NUM_LIMIT) { 300 // No infix 301 infixLength = 0; 302 } else { 303 infixLength -= ARG_NUM_LIMIT; 304 result.insert(index + length, compiledPattern, offset, offset + infixLength, field, status); 305 offset += infixLength; 306 length += infixLength; 307 offset++; 308 } 309 310 int32_t suffixLength; 311 if (offset == compiledPattern.length()) { 312 // No suffix 313 suffixLength = 0; 314 } else { 315 suffixLength = compiledPattern.charAt(offset) - ARG_NUM_LIMIT; 316 offset++; 317 result.insert(index + length, compiledPattern, offset, offset + suffixLength, field, status); 318 length += suffixLength; 319 } 320 321 *outPrefixLength = prefixLength; 322 *outSuffixLength = suffixLength; 323 324 return length; 325 } 326 327 328 int32_t ConstantMultiFieldModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex, 329 UErrorCode &status) const { 330 int32_t length = output.insert(leftIndex, fPrefix, status); 331 if (fOverwrite) { 332 length += output.splice( 333 leftIndex + length, 334 rightIndex + length, 335 UnicodeString(), 0, 0, 336 kUndefinedField, status); 337 } 338 length += output.insert(rightIndex + length, fSuffix, status); 339 return length; 340 } 341 342 int32_t ConstantMultiFieldModifier::getPrefixLength() const { 343 return fPrefix.length(); 344 } 345 346 int32_t ConstantMultiFieldModifier::getCodePointCount() const { 347 return fPrefix.codePointCount() + fSuffix.codePointCount(); 348 } 349 350 bool ConstantMultiFieldModifier::isStrong() const { 351 return fStrong; 352 } 353 354 bool ConstantMultiFieldModifier::containsField(Field field) const { 355 return fPrefix.containsField(field) || fSuffix.containsField(field); 356 } 357 358 void ConstantMultiFieldModifier::getParameters(Parameters& output) const { 359 output = fParameters; 360 } 361 362 bool ConstantMultiFieldModifier::strictEquals(const Modifier& other) const { 363 const auto* _other = dynamic_cast<const ConstantMultiFieldModifier*>(&other); 364 if (_other == nullptr) { 365 return false; 366 } 367 return fPrefix.contentEquals(_other->fPrefix) 368 && fSuffix.contentEquals(_other->fSuffix) 369 && fOverwrite == _other->fOverwrite 370 && fStrong == _other->fStrong; 371 } 372 373 374 CurrencySpacingEnabledModifier::CurrencySpacingEnabledModifier(const FormattedStringBuilder &prefix, 375 const FormattedStringBuilder &suffix, 376 bool overwrite, 377 bool strong, 378 const DecimalFormatSymbols &symbols, 379 UErrorCode &status) 380 : ConstantMultiFieldModifier(prefix, suffix, overwrite, strong) { 381 // Check for currency spacing. Do not build the UnicodeSets unless there is 382 // a currency code point at a boundary. 383 if (prefix.length() > 0 && prefix.fieldAt(prefix.length() - 1) == Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) { 384 int prefixCp = prefix.getLastCodePoint(); 385 UnicodeSet prefixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, PREFIX, status); 386 if (prefixUnicodeSet.contains(prefixCp)) { 387 fAfterPrefixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, PREFIX, status); 388 fAfterPrefixUnicodeSet.freeze(); 389 fAfterPrefixInsert = getInsertString(symbols, PREFIX, status); 390 } else { 391 fAfterPrefixUnicodeSet.setToBogus(); 392 fAfterPrefixInsert.setToBogus(); 393 } 394 } else { 395 fAfterPrefixUnicodeSet.setToBogus(); 396 fAfterPrefixInsert.setToBogus(); 397 } 398 if (suffix.length() > 0 && suffix.fieldAt(0) == Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) { 399 int suffixCp = suffix.getFirstCodePoint(); 400 UnicodeSet suffixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, SUFFIX, status); 401 if (suffixUnicodeSet.contains(suffixCp)) { 402 fBeforeSuffixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, SUFFIX, status); 403 fBeforeSuffixUnicodeSet.freeze(); 404 fBeforeSuffixInsert = getInsertString(symbols, SUFFIX, status); 405 } else { 406 fBeforeSuffixUnicodeSet.setToBogus(); 407 fBeforeSuffixInsert.setToBogus(); 408 } 409 } else { 410 fBeforeSuffixUnicodeSet.setToBogus(); 411 fBeforeSuffixInsert.setToBogus(); 412 } 413 } 414 415 int32_t CurrencySpacingEnabledModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex, 416 UErrorCode &status) const { 417 // Currency spacing logic 418 int length = 0; 419 if (rightIndex - leftIndex > 0 && !fAfterPrefixUnicodeSet.isBogus() && 420 fAfterPrefixUnicodeSet.contains(output.codePointAt(leftIndex))) { 421 // TODO: Should we use the CURRENCY field here? 422 length += output.insert( 423 leftIndex, 424 fAfterPrefixInsert, 425 kUndefinedField, 426 status); 427 } 428 if (rightIndex - leftIndex > 0 && !fBeforeSuffixUnicodeSet.isBogus() && 429 fBeforeSuffixUnicodeSet.contains(output.codePointBefore(rightIndex))) { 430 // TODO: Should we use the CURRENCY field here? 431 length += output.insert( 432 rightIndex + length, 433 fBeforeSuffixInsert, 434 kUndefinedField, 435 status); 436 } 437 438 // Call super for the remaining logic 439 length += ConstantMultiFieldModifier::apply(output, leftIndex, rightIndex + length, status); 440 return length; 441 } 442 443 int32_t 444 CurrencySpacingEnabledModifier::applyCurrencySpacing(FormattedStringBuilder &output, int32_t prefixStart, 445 int32_t prefixLen, int32_t suffixStart, 446 int32_t suffixLen, 447 const DecimalFormatSymbols &symbols, 448 UErrorCode &status) { 449 int length = 0; 450 bool hasPrefix = (prefixLen > 0); 451 bool hasSuffix = (suffixLen > 0); 452 bool hasNumber = (suffixStart - prefixStart - prefixLen > 0); // could be empty string 453 if (hasPrefix && hasNumber) { 454 length += applyCurrencySpacingAffix(output, prefixStart + prefixLen, PREFIX, symbols, status); 455 } 456 if (hasSuffix && hasNumber) { 457 length += applyCurrencySpacingAffix(output, suffixStart + length, SUFFIX, symbols, status); 458 } 459 return length; 460 } 461 462 int32_t 463 CurrencySpacingEnabledModifier::applyCurrencySpacingAffix(FormattedStringBuilder &output, int32_t index, 464 EAffix affix, 465 const DecimalFormatSymbols &symbols, 466 UErrorCode &status) { 467 // NOTE: For prefix, output.fieldAt(index-1) gets the last field type in the prefix. 468 // This works even if the last code point in the prefix is 2 code units because the 469 // field value gets populated to both indices in the field array. 470 Field affixField = (affix == PREFIX) ? output.fieldAt(index - 1) : output.fieldAt(index); 471 if (affixField != Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) { 472 return 0; 473 } 474 int affixCp = (affix == PREFIX) ? output.codePointBefore(index) : output.codePointAt(index); 475 UnicodeSet affixUniset = getUnicodeSet(symbols, IN_CURRENCY, affix, status); 476 if (!affixUniset.contains(affixCp)) { 477 return 0; 478 } 479 int numberCp = (affix == PREFIX) ? output.codePointAt(index) : output.codePointBefore(index); 480 UnicodeSet numberUniset = getUnicodeSet(symbols, IN_NUMBER, affix, status); 481 if (!numberUniset.contains(numberCp)) { 482 return 0; 483 } 484 UnicodeString spacingString = getInsertString(symbols, affix, status); 485 486 // NOTE: This next line *inserts* the spacing string, triggering an arraycopy. 487 // It would be more efficient if this could be done before affixes were attached, 488 // so that it could be prepended/appended instead of inserted. 489 // However, the build code path is more efficient, and this is the most natural 490 // place to put currency spacing in the non-build code path. 491 // TODO: Should we use the CURRENCY field here? 492 return output.insert(index, spacingString, kUndefinedField, status); 493 } 494 495 UnicodeSet 496 CurrencySpacingEnabledModifier::getUnicodeSet(const DecimalFormatSymbols &symbols, EPosition position, 497 EAffix affix, UErrorCode &status) { 498 // Ensure the static defaults are initialized: 499 umtx_initOnce(gDefaultCurrencySpacingInitOnce, &initDefaultCurrencySpacing, status); 500 if (U_FAILURE(status)) { 501 return {}; 502 } 503 504 const UnicodeString& pattern = symbols.getPatternForCurrencySpacing( 505 position == IN_CURRENCY ? UNUM_CURRENCY_MATCH : UNUM_CURRENCY_SURROUNDING_MATCH, 506 affix == SUFFIX, 507 status); 508 if (pattern.compare(u"[:digit:]", -1) == 0) { 509 return *UNISET_DIGIT; 510 } else if (pattern.compare(u"[[:^S:]&[:^Z:]]", -1) == 0) { 511 return *UNISET_NOTSZ; 512 } else { 513 return UnicodeSet(pattern, status); 514 } 515 } 516 517 UnicodeString 518 CurrencySpacingEnabledModifier::getInsertString(const DecimalFormatSymbols &symbols, EAffix affix, 519 UErrorCode &status) { 520 return symbols.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, affix == SUFFIX, status); 521 } 522 523 #endif /* #if !UCONFIG_NO_FORMATTING */