number_patternstring.cpp (45032B)
1 // © 2017 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #include "unicode/utypes.h" 5 6 #if !UCONFIG_NO_FORMATTING 7 8 // Allow implicit conversion from char16_t* to UnicodeString for this file: 9 // Helpful in toString methods and elsewhere. 10 #define UNISTR_FROM_STRING_EXPLICIT 11 #define UNISTR_FROM_CHAR_EXPLICIT 12 13 #include "uassert.h" 14 #include "number_patternstring.h" 15 #include "unicode/utf16.h" 16 #include "number_utils.h" 17 #include "number_roundingutils.h" 18 #include "number_mapper.h" 19 20 using namespace icu; 21 using namespace icu::number; 22 using namespace icu::number::impl; 23 24 25 void PatternParser::parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo& patternInfo, 26 UErrorCode& status) { 27 patternInfo.consumePattern(patternString, status); 28 } 29 30 DecimalFormatProperties 31 PatternParser::parseToProperties(const UnicodeString& pattern, IgnoreRounding ignoreRounding, 32 UErrorCode& status) { 33 DecimalFormatProperties properties; 34 parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status); 35 return properties; 36 } 37 38 DecimalFormatProperties PatternParser::parseToProperties(const UnicodeString& pattern, 39 UErrorCode& status) { 40 return parseToProperties(pattern, IGNORE_ROUNDING_NEVER, status); 41 } 42 43 void 44 PatternParser::parseToExistingProperties(const UnicodeString& pattern, DecimalFormatProperties& properties, 45 IgnoreRounding ignoreRounding, UErrorCode& status) { 46 parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status); 47 } 48 49 50 char16_t ParsedPatternInfo::charAt(int32_t flags, int32_t index) const { 51 const Endpoints& endpoints = getEndpoints(flags); 52 if (index < 0 || index >= endpoints.end - endpoints.start) { 53 UPRV_UNREACHABLE_EXIT; 54 } 55 return pattern.charAt(endpoints.start + index); 56 } 57 58 int32_t ParsedPatternInfo::length(int32_t flags) const { 59 return getLengthFromEndpoints(getEndpoints(flags)); 60 } 61 62 int32_t ParsedPatternInfo::getLengthFromEndpoints(const Endpoints& endpoints) { 63 return endpoints.end - endpoints.start; 64 } 65 66 UnicodeString ParsedPatternInfo::getString(int32_t flags) const { 67 const Endpoints& endpoints = getEndpoints(flags); 68 if (endpoints.start == endpoints.end) { 69 return {}; 70 } 71 // Create a new UnicodeString 72 return UnicodeString(pattern, endpoints.start, endpoints.end - endpoints.start); 73 } 74 75 const Endpoints& ParsedPatternInfo::getEndpoints(int32_t flags) const { 76 bool prefix = (flags & AFFIX_PREFIX) != 0; 77 bool isNegative = (flags & AFFIX_NEGATIVE_SUBPATTERN) != 0; 78 bool padding = (flags & AFFIX_PADDING) != 0; 79 if (isNegative && padding) { 80 return negative.paddingEndpoints; 81 } else if (padding) { 82 return positive.paddingEndpoints; 83 } else if (prefix && isNegative) { 84 return negative.prefixEndpoints; 85 } else if (prefix) { 86 return positive.prefixEndpoints; 87 } else if (isNegative) { 88 return negative.suffixEndpoints; 89 } else { 90 return positive.suffixEndpoints; 91 } 92 } 93 94 bool ParsedPatternInfo::positiveHasPlusSign() const { 95 return positive.hasPlusSign; 96 } 97 98 bool ParsedPatternInfo::hasNegativeSubpattern() const { 99 return fHasNegativeSubpattern; 100 } 101 102 bool ParsedPatternInfo::negativeHasMinusSign() const { 103 return negative.hasMinusSign; 104 } 105 106 bool ParsedPatternInfo::hasCurrencySign() const { 107 return positive.hasCurrencySign || (fHasNegativeSubpattern && negative.hasCurrencySign); 108 } 109 110 bool ParsedPatternInfo::containsSymbolType(AffixPatternType type, UErrorCode& status) const { 111 return AffixUtils::containsType(pattern, type, status); 112 } 113 114 bool ParsedPatternInfo::hasBody() const { 115 return positive.integerTotal > 0; 116 } 117 118 bool ParsedPatternInfo::currencyAsDecimal() const { 119 return positive.hasCurrencyDecimal; 120 } 121 122 ///////////////////////////////////////////////////// 123 /// BEGIN RECURSIVE DESCENT PARSER IMPLEMENTATION /// 124 ///////////////////////////////////////////////////// 125 126 UChar32 ParsedPatternInfo::ParserState::peek() { 127 if (offset == pattern.length()) { 128 return -1; 129 } else { 130 return pattern.char32At(offset); 131 } 132 } 133 134 UChar32 ParsedPatternInfo::ParserState::peek2() { 135 if (offset == pattern.length()) { 136 return -1; 137 } 138 int32_t cp1 = pattern.char32At(offset); 139 int32_t offset2 = offset + U16_LENGTH(cp1); 140 if (offset2 == pattern.length()) { 141 return -1; 142 } 143 return pattern.char32At(offset2); 144 } 145 146 UChar32 ParsedPatternInfo::ParserState::next() { 147 int32_t codePoint = peek(); 148 offset += U16_LENGTH(codePoint); 149 return codePoint; 150 } 151 152 void ParsedPatternInfo::consumePattern(const UnicodeString& patternString, UErrorCode& status) { 153 if (U_FAILURE(status)) { return; } 154 this->pattern = patternString; 155 156 // This class is not intended for writing twice! 157 // Use move assignment to overwrite instead. 158 U_ASSERT(state.offset == 0); 159 160 // pattern := subpattern (';' subpattern)? 161 currentSubpattern = &positive; 162 consumeSubpattern(status); 163 if (U_FAILURE(status)) { return; } 164 if (state.peek() == u';') { 165 state.next(); // consume the ';' 166 // Don't consume the negative subpattern if it is empty (trailing ';') 167 if (state.peek() != -1) { 168 fHasNegativeSubpattern = true; 169 currentSubpattern = &negative; 170 consumeSubpattern(status); 171 if (U_FAILURE(status)) { return; } 172 } 173 } 174 if (state.peek() != -1) { 175 state.toParseException(u"Found unquoted special character"); 176 status = U_UNQUOTED_SPECIAL; 177 } 178 } 179 180 void ParsedPatternInfo::consumeSubpattern(UErrorCode& status) { 181 // subpattern := literals? number exponent? literals? 182 consumePadding(PadPosition::UNUM_PAD_BEFORE_PREFIX, status); 183 if (U_FAILURE(status)) { return; } 184 consumeAffix(currentSubpattern->prefixEndpoints, status); 185 if (U_FAILURE(status)) { return; } 186 consumePadding(PadPosition::UNUM_PAD_AFTER_PREFIX, status); 187 if (U_FAILURE(status)) { return; } 188 consumeFormat(status); 189 if (U_FAILURE(status)) { return; } 190 consumeExponent(status); 191 if (U_FAILURE(status)) { return; } 192 consumePadding(PadPosition::UNUM_PAD_BEFORE_SUFFIX, status); 193 if (U_FAILURE(status)) { return; } 194 consumeAffix(currentSubpattern->suffixEndpoints, status); 195 if (U_FAILURE(status)) { return; } 196 consumePadding(PadPosition::UNUM_PAD_AFTER_SUFFIX, status); 197 if (U_FAILURE(status)) { return; } 198 } 199 200 void ParsedPatternInfo::consumePadding(PadPosition paddingLocation, UErrorCode& status) { 201 if (state.peek() != u'*') { 202 return; 203 } 204 if (currentSubpattern->hasPadding) { 205 state.toParseException(u"Cannot have multiple pad specifiers"); 206 status = U_MULTIPLE_PAD_SPECIFIERS; 207 return; 208 } 209 currentSubpattern->paddingLocation = paddingLocation; 210 currentSubpattern->hasPadding = true; 211 state.next(); // consume the '*' 212 currentSubpattern->paddingEndpoints.start = state.offset; 213 consumeLiteral(status); 214 currentSubpattern->paddingEndpoints.end = state.offset; 215 } 216 217 void ParsedPatternInfo::consumeAffix(Endpoints& endpoints, UErrorCode& status) { 218 // literals := { literal } 219 endpoints.start = state.offset; 220 while (true) { 221 switch (state.peek()) { 222 case u'#': 223 case u'@': 224 case u';': 225 case u'*': 226 case u'.': 227 case u',': 228 case u'0': 229 case u'1': 230 case u'2': 231 case u'3': 232 case u'4': 233 case u'5': 234 case u'6': 235 case u'7': 236 case u'8': 237 case u'9': 238 case -1: 239 // Characters that cannot appear unquoted in a literal 240 // break outer; 241 goto after_outer; 242 243 case u'%': 244 currentSubpattern->hasPercentSign = true; 245 break; 246 247 case u'‰': 248 currentSubpattern->hasPerMilleSign = true; 249 break; 250 251 case u'¤': 252 currentSubpattern->hasCurrencySign = true; 253 break; 254 255 case u'-': 256 currentSubpattern->hasMinusSign = true; 257 break; 258 259 case u'+': 260 currentSubpattern->hasPlusSign = true; 261 break; 262 263 default: 264 break; 265 } 266 consumeLiteral(status); 267 if (U_FAILURE(status)) { return; } 268 } 269 after_outer: 270 endpoints.end = state.offset; 271 } 272 273 void ParsedPatternInfo::consumeLiteral(UErrorCode& status) { 274 if (state.peek() == -1) { 275 state.toParseException(u"Expected unquoted literal but found EOL"); 276 status = U_PATTERN_SYNTAX_ERROR; 277 return; 278 } else if (state.peek() == u'\'') { 279 state.next(); // consume the starting quote 280 while (state.peek() != u'\'') { 281 if (state.peek() == -1) { 282 state.toParseException(u"Expected quoted literal but found EOL"); 283 status = U_PATTERN_SYNTAX_ERROR; 284 return; 285 } else { 286 state.next(); // consume a quoted character 287 } 288 } 289 state.next(); // consume the ending quote 290 } else { 291 // consume a non-quoted literal character 292 state.next(); 293 } 294 } 295 296 void ParsedPatternInfo::consumeFormat(UErrorCode& status) { 297 consumeIntegerFormat(status); 298 if (U_FAILURE(status)) { return; } 299 if (state.peek() == u'.') { 300 state.next(); // consume the decimal point 301 currentSubpattern->hasDecimal = true; 302 currentSubpattern->widthExceptAffixes += 1; 303 consumeFractionFormat(status); 304 if (U_FAILURE(status)) { return; } 305 } else if (state.peek() == u'¤') { 306 // Check if currency is a decimal separator 307 switch (state.peek2()) { 308 case u'#': 309 case u'0': 310 case u'1': 311 case u'2': 312 case u'3': 313 case u'4': 314 case u'5': 315 case u'6': 316 case u'7': 317 case u'8': 318 case u'9': 319 break; 320 default: 321 // Currency symbol followed by a non-numeric character; 322 // treat as a normal affix. 323 return; 324 } 325 // Currency symbol is followed by a numeric character; 326 // treat as a decimal separator. 327 currentSubpattern->hasCurrencySign = true; 328 currentSubpattern->hasCurrencyDecimal = true; 329 currentSubpattern->hasDecimal = true; 330 currentSubpattern->widthExceptAffixes += 1; 331 state.next(); // consume the symbol 332 consumeFractionFormat(status); 333 if (U_FAILURE(status)) { return; } 334 } 335 } 336 337 void ParsedPatternInfo::consumeIntegerFormat(UErrorCode& status) { 338 // Convenience reference: 339 ParsedSubpatternInfo& result = *currentSubpattern; 340 341 while (true) { 342 switch (state.peek()) { 343 case u',': 344 result.widthExceptAffixes += 1; 345 result.groupingSizes <<= 16; 346 break; 347 348 case u'#': 349 if (result.integerNumerals > 0) { 350 state.toParseException(u"# cannot follow 0 before decimal point"); 351 status = U_UNEXPECTED_TOKEN; 352 return; 353 } 354 result.widthExceptAffixes += 1; 355 result.groupingSizes += 1; 356 if (result.integerAtSigns > 0) { 357 result.integerTrailingHashSigns += 1; 358 } else { 359 result.integerLeadingHashSigns += 1; 360 } 361 result.integerTotal += 1; 362 break; 363 364 case u'@': 365 if (result.integerNumerals > 0) { 366 state.toParseException(u"Cannot mix 0 and @"); 367 status = U_UNEXPECTED_TOKEN; 368 return; 369 } 370 if (result.integerTrailingHashSigns > 0) { 371 state.toParseException(u"Cannot nest # inside of a run of @"); 372 status = U_UNEXPECTED_TOKEN; 373 return; 374 } 375 result.widthExceptAffixes += 1; 376 result.groupingSizes += 1; 377 result.integerAtSigns += 1; 378 result.integerTotal += 1; 379 break; 380 381 case u'0': 382 case u'1': 383 case u'2': 384 case u'3': 385 case u'4': 386 case u'5': 387 case u'6': 388 case u'7': 389 case u'8': 390 case u'9': 391 if (result.integerAtSigns > 0) { 392 state.toParseException(u"Cannot mix @ and 0"); 393 status = U_UNEXPECTED_TOKEN; 394 return; 395 } 396 result.widthExceptAffixes += 1; 397 result.groupingSizes += 1; 398 result.integerNumerals += 1; 399 result.integerTotal += 1; 400 if (!result.rounding.isZeroish() || state.peek() != u'0') { 401 result.rounding.appendDigit(static_cast<int8_t>(state.peek() - u'0'), 0, true); 402 } 403 break; 404 405 default: 406 goto after_outer; 407 } 408 state.next(); // consume the symbol 409 } 410 411 after_outer: 412 // Disallow patterns with a trailing ',' or with two ',' next to each other 413 auto grouping1 = static_cast<int16_t> (result.groupingSizes & 0xffff); 414 auto grouping2 = static_cast<int16_t> ((result.groupingSizes >> 16) & 0xffff); 415 auto grouping3 = static_cast<int16_t> ((result.groupingSizes >> 32) & 0xffff); 416 if (grouping1 == 0 && grouping2 != -1) { 417 state.toParseException(u"Trailing grouping separator is invalid"); 418 status = U_UNEXPECTED_TOKEN; 419 return; 420 } 421 if (grouping2 == 0 && grouping3 != -1) { 422 state.toParseException(u"Grouping width of zero is invalid"); 423 status = U_PATTERN_SYNTAX_ERROR; 424 return; 425 } 426 } 427 428 void ParsedPatternInfo::consumeFractionFormat(UErrorCode& status) { 429 // Convenience reference: 430 ParsedSubpatternInfo& result = *currentSubpattern; 431 432 int32_t zeroCounter = 0; 433 while (true) { 434 switch (state.peek()) { 435 case u'#': 436 result.widthExceptAffixes += 1; 437 result.fractionHashSigns += 1; 438 result.fractionTotal += 1; 439 zeroCounter++; 440 break; 441 442 case u'0': 443 case u'1': 444 case u'2': 445 case u'3': 446 case u'4': 447 case u'5': 448 case u'6': 449 case u'7': 450 case u'8': 451 case u'9': 452 if (result.fractionHashSigns > 0) { 453 state.toParseException(u"0 cannot follow # after decimal point"); 454 status = U_UNEXPECTED_TOKEN; 455 return; 456 } 457 result.widthExceptAffixes += 1; 458 result.fractionNumerals += 1; 459 result.fractionTotal += 1; 460 if (state.peek() == u'0') { 461 zeroCounter++; 462 } else { 463 result.rounding 464 .appendDigit(static_cast<int8_t>(state.peek() - u'0'), zeroCounter, false); 465 zeroCounter = 0; 466 } 467 break; 468 469 default: 470 return; 471 } 472 state.next(); // consume the symbol 473 } 474 } 475 476 void ParsedPatternInfo::consumeExponent(UErrorCode& status) { 477 // Convenience reference: 478 ParsedSubpatternInfo& result = *currentSubpattern; 479 480 if (state.peek() != u'E') { 481 return; 482 } 483 if ((result.groupingSizes & 0xffff0000L) != 0xffff0000L) { 484 state.toParseException(u"Cannot have grouping separator in scientific notation"); 485 status = U_MALFORMED_EXPONENTIAL_PATTERN; 486 return; 487 } 488 state.next(); // consume the E 489 result.widthExceptAffixes++; 490 if (state.peek() == u'+') { 491 state.next(); // consume the + 492 result.exponentHasPlusSign = true; 493 result.widthExceptAffixes++; 494 } 495 while (state.peek() == u'0') { 496 state.next(); // consume the 0 497 result.exponentZeros += 1; 498 result.widthExceptAffixes++; 499 } 500 } 501 502 /////////////////////////////////////////////////// 503 /// END RECURSIVE DESCENT PARSER IMPLEMENTATION /// 504 /////////////////////////////////////////////////// 505 506 void PatternParser::parseToExistingPropertiesImpl(const UnicodeString& pattern, 507 DecimalFormatProperties& properties, 508 IgnoreRounding ignoreRounding, UErrorCode& status) { 509 if (pattern.length() == 0) { 510 // Backwards compatibility requires that we reset to the default values. 511 // TODO: Only overwrite the properties that "saveToProperties" normally touches? 512 properties.clear(); 513 return; 514 } 515 516 ParsedPatternInfo patternInfo; 517 parseToPatternInfo(pattern, patternInfo, status); 518 if (U_FAILURE(status)) { return; } 519 patternInfoToProperties(properties, patternInfo, ignoreRounding, status); 520 } 521 522 void 523 PatternParser::patternInfoToProperties(DecimalFormatProperties& properties, ParsedPatternInfo& patternInfo, 524 IgnoreRounding _ignoreRounding, UErrorCode& status) { 525 // Translate from PatternParseResult to Properties. 526 // Note that most data from "negative" is ignored per the specification of DecimalFormat. 527 528 const ParsedSubpatternInfo& positive = patternInfo.positive; 529 530 bool ignoreRounding; 531 if (_ignoreRounding == IGNORE_ROUNDING_NEVER) { 532 ignoreRounding = false; 533 } else if (_ignoreRounding == IGNORE_ROUNDING_IF_CURRENCY) { 534 ignoreRounding = positive.hasCurrencySign; 535 } else { 536 U_ASSERT(_ignoreRounding == IGNORE_ROUNDING_ALWAYS); 537 ignoreRounding = true; 538 } 539 540 // Grouping settings 541 auto grouping1 = static_cast<int16_t> (positive.groupingSizes & 0xffff); 542 auto grouping2 = static_cast<int16_t> ((positive.groupingSizes >> 16) & 0xffff); 543 auto grouping3 = static_cast<int16_t> ((positive.groupingSizes >> 32) & 0xffff); 544 if (grouping2 != -1) { 545 properties.groupingSize = grouping1; 546 properties.groupingUsed = true; 547 } else { 548 properties.groupingSize = -1; 549 properties.groupingUsed = false; 550 } 551 if (grouping3 != -1) { 552 properties.secondaryGroupingSize = grouping2; 553 } else { 554 properties.secondaryGroupingSize = -1; 555 } 556 557 // For backwards compatibility, require that the pattern emit at least one min digit. 558 int minInt, minFrac; 559 if (positive.integerTotal == 0 && positive.fractionTotal > 0) { 560 // patterns like ".##" 561 minInt = 0; 562 minFrac = uprv_max(1, positive.fractionNumerals); 563 } else if (positive.integerNumerals == 0 && positive.fractionNumerals == 0) { 564 // patterns like "#.##" 565 minInt = 1; 566 minFrac = 0; 567 } else { 568 minInt = positive.integerNumerals; 569 minFrac = positive.fractionNumerals; 570 } 571 572 // Rounding settings 573 // Don't set basic rounding when there is a currency sign; defer to CurrencyUsage 574 if (positive.integerAtSigns > 0) { 575 properties.minimumFractionDigits = -1; 576 properties.maximumFractionDigits = -1; 577 properties.roundingIncrement = 0.0; 578 properties.minimumSignificantDigits = positive.integerAtSigns; 579 properties.maximumSignificantDigits = positive.integerAtSigns + positive.integerTrailingHashSigns; 580 } else if (!positive.rounding.isZeroish()) { 581 if (!ignoreRounding) { 582 properties.minimumFractionDigits = minFrac; 583 properties.maximumFractionDigits = positive.fractionTotal; 584 properties.roundingIncrement = positive.rounding.toDouble(); 585 } else { 586 properties.minimumFractionDigits = -1; 587 properties.maximumFractionDigits = -1; 588 properties.roundingIncrement = 0.0; 589 } 590 properties.minimumSignificantDigits = -1; 591 properties.maximumSignificantDigits = -1; 592 } else { 593 if (!ignoreRounding) { 594 properties.minimumFractionDigits = minFrac; 595 properties.maximumFractionDigits = positive.fractionTotal; 596 properties.roundingIncrement = 0.0; 597 } else { 598 properties.minimumFractionDigits = -1; 599 properties.maximumFractionDigits = -1; 600 properties.roundingIncrement = 0.0; 601 } 602 properties.minimumSignificantDigits = -1; 603 properties.maximumSignificantDigits = -1; 604 } 605 606 // If the pattern ends with a '.' then force the decimal point. 607 if (positive.hasDecimal && positive.fractionTotal == 0) { 608 properties.decimalSeparatorAlwaysShown = true; 609 } else { 610 properties.decimalSeparatorAlwaysShown = false; 611 } 612 613 // Persist the currency as decimal separator 614 properties.currencyAsDecimal = positive.hasCurrencyDecimal; 615 616 // Scientific notation settings 617 if (positive.exponentZeros > 0) { 618 properties.exponentSignAlwaysShown = positive.exponentHasPlusSign; 619 properties.minimumExponentDigits = positive.exponentZeros; 620 if (positive.integerAtSigns == 0) { 621 // patterns without '@' can define max integer digits, used for engineering notation 622 properties.minimumIntegerDigits = positive.integerNumerals; 623 properties.maximumIntegerDigits = positive.integerTotal; 624 } else { 625 // patterns with '@' cannot define max integer digits 626 properties.minimumIntegerDigits = 1; 627 properties.maximumIntegerDigits = -1; 628 } 629 } else { 630 properties.exponentSignAlwaysShown = false; 631 properties.minimumExponentDigits = -1; 632 properties.minimumIntegerDigits = minInt; 633 properties.maximumIntegerDigits = -1; 634 } 635 636 // Compute the affix patterns (required for both padding and affixes) 637 UnicodeString posPrefix = patternInfo.getString(AffixPatternProvider::AFFIX_PREFIX); 638 UnicodeString posSuffix = patternInfo.getString(0); 639 640 // Padding settings 641 if (positive.hasPadding) { 642 // The width of the positive prefix and suffix templates are included in the padding 643 int paddingWidth = positive.widthExceptAffixes + 644 AffixUtils::estimateLength(posPrefix, status) + 645 AffixUtils::estimateLength(posSuffix, status); 646 properties.formatWidth = paddingWidth; 647 UnicodeString rawPaddingString = patternInfo.getString(AffixPatternProvider::AFFIX_PADDING); 648 if (rawPaddingString.length() == 1) { 649 properties.padString = rawPaddingString; 650 } else if (rawPaddingString.length() == 2) { 651 if (rawPaddingString.charAt(0) == u'\'') { 652 properties.padString.setTo(u"'", -1); 653 } else { 654 properties.padString = rawPaddingString; 655 } 656 } else { 657 properties.padString = UnicodeString(rawPaddingString, 1, rawPaddingString.length() - 2); 658 } 659 properties.padPosition = positive.paddingLocation; 660 } else { 661 properties.formatWidth = -1; 662 properties.padString.setToBogus(); 663 properties.padPosition.nullify(); 664 } 665 666 // Set the affixes 667 // Always call the setter, even if the prefixes are empty, especially in the case of the 668 // negative prefix pattern, to prevent default values from overriding the pattern. 669 properties.positivePrefixPattern = posPrefix; 670 properties.positiveSuffixPattern = posSuffix; 671 if (patternInfo.fHasNegativeSubpattern) { 672 properties.negativePrefixPattern = patternInfo.getString( 673 AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN | AffixPatternProvider::AFFIX_PREFIX); 674 properties.negativeSuffixPattern = patternInfo.getString( 675 AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN); 676 } else { 677 properties.negativePrefixPattern.setToBogus(); 678 properties.negativeSuffixPattern.setToBogus(); 679 } 680 681 // Set the magnitude multiplier 682 if (positive.hasPercentSign) { 683 properties.magnitudeMultiplier = 2; 684 } else if (positive.hasPerMilleSign) { 685 properties.magnitudeMultiplier = 3; 686 } else { 687 properties.magnitudeMultiplier = 0; 688 } 689 } 690 691 /////////////////////////////////////////////////////////////////// 692 /// End PatternStringParser.java; begin PatternStringUtils.java /// 693 /////////////////////////////////////////////////////////////////// 694 695 // Determine whether a given roundingIncrement should be ignored for formatting 696 // based on the current maxFrac value (maximum fraction digits). For example a 697 // roundingIncrement of 0.01 should be ignored if maxFrac is 1, but not if maxFrac 698 // is 2 or more. Note that roundingIncrements are rounded in significance, so 699 // a roundingIncrement of 0.006 is treated like 0.01 for this determination, i.e. 700 // it should not be ignored if maxFrac is 2 or more (but a roundingIncrement of 701 // 0.005 is treated like 0.001 for significance). This is the reason for the 702 // initial doubling below. 703 // roundIncr must be non-zero. 704 bool PatternStringUtils::ignoreRoundingIncrement(double roundIncr, int32_t maxFrac) { 705 if (maxFrac < 0) { 706 return false; 707 } 708 int32_t frac = 0; 709 roundIncr *= 2.0; 710 for (frac = 0; frac <= maxFrac && roundIncr <= 1.0; frac++, roundIncr *= 10.0); 711 return (frac > maxFrac); 712 } 713 714 UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatProperties& properties, 715 UErrorCode& status) { 716 UnicodeString sb; 717 718 // Convenience references 719 // The uprv_min() calls prevent DoS 720 int32_t dosMax = 100; 721 int32_t grouping1 = uprv_max(0, uprv_min(properties.groupingSize, dosMax)); 722 int32_t grouping2 = uprv_max(0, uprv_min(properties.secondaryGroupingSize, dosMax)); 723 bool useGrouping = properties.groupingUsed; 724 int32_t paddingWidth = uprv_min(properties.formatWidth, dosMax); 725 NullableValue<PadPosition> paddingLocation = properties.padPosition; 726 UnicodeString paddingString = properties.padString; 727 int32_t minInt = uprv_max(0, uprv_min(properties.minimumIntegerDigits, dosMax)); 728 int32_t maxInt = uprv_min(properties.maximumIntegerDigits, dosMax); 729 int32_t minFrac = uprv_max(0, uprv_min(properties.minimumFractionDigits, dosMax)); 730 int32_t maxFrac = uprv_min(properties.maximumFractionDigits, dosMax); 731 int32_t minSig = uprv_min(properties.minimumSignificantDigits, dosMax); 732 int32_t maxSig = uprv_min(properties.maximumSignificantDigits, dosMax); 733 bool alwaysShowDecimal = properties.decimalSeparatorAlwaysShown; 734 int32_t exponentDigits = uprv_min(properties.minimumExponentDigits, dosMax); 735 bool exponentShowPlusSign = properties.exponentSignAlwaysShown; 736 737 AutoAffixPatternProvider affixProvider(properties, status); 738 739 // Prefixes 740 sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_POS_PREFIX)); 741 int32_t afterPrefixPos = sb.length(); 742 743 // Figure out the grouping sizes. 744 if (!useGrouping) { 745 grouping1 = 0; 746 grouping2 = 0; 747 } else if (grouping1 == grouping2) { 748 grouping1 = 0; 749 } 750 int32_t groupingLength = grouping1 + grouping2 + 1; 751 752 // Figure out the digits we need to put in the pattern. 753 double increment = properties.roundingIncrement; 754 UnicodeString digitsString; 755 int32_t digitsStringScale = 0; 756 if (maxSig != uprv_min(dosMax, -1)) { 757 // Significant Digits. 758 while (digitsString.length() < minSig) { 759 digitsString.append(u'@'); 760 } 761 while (digitsString.length() < maxSig) { 762 digitsString.append(u'#'); 763 } 764 } else if (increment != 0.0 && !ignoreRoundingIncrement(increment,maxFrac)) { 765 // Rounding Increment. 766 DecimalQuantity incrementQuantity; 767 incrementQuantity.setToDouble(increment); 768 incrementQuantity.roundToInfinity(); 769 digitsStringScale = incrementQuantity.getLowerDisplayMagnitude(); 770 incrementQuantity.adjustMagnitude(-digitsStringScale); 771 incrementQuantity.increaseMinIntegerTo(minInt - digitsStringScale); 772 UnicodeString str = incrementQuantity.toPlainString(); 773 if (str.charAt(0) == u'-') { 774 // TODO: Unsupported operation exception or fail silently? 775 digitsString.append(str, 1, str.length() - 1); 776 } else { 777 digitsString.append(str); 778 } 779 } 780 while (digitsString.length() + digitsStringScale < minInt) { 781 digitsString.insert(0, u'0'); 782 } 783 while (-digitsStringScale < minFrac) { 784 digitsString.append(u'0'); 785 digitsStringScale--; 786 } 787 788 // Write the digits to the string builder 789 int32_t m0 = uprv_max(groupingLength, digitsString.length() + digitsStringScale); 790 m0 = (maxInt != dosMax) ? uprv_max(maxInt, m0) - 1 : m0 - 1; 791 int32_t mN = (maxFrac != dosMax) ? uprv_min(-maxFrac, digitsStringScale) : digitsStringScale; 792 for (int32_t magnitude = m0; magnitude >= mN; magnitude--) { 793 int32_t di = digitsString.length() + digitsStringScale - magnitude - 1; 794 if (di < 0 || di >= digitsString.length()) { 795 sb.append(u'#'); 796 } else { 797 sb.append(digitsString.charAt(di)); 798 } 799 // Decimal separator 800 if (magnitude == 0 && (alwaysShowDecimal || mN < 0)) { 801 if (properties.currencyAsDecimal) { 802 sb.append(u'¤'); 803 } else { 804 sb.append(u'.'); 805 } 806 } 807 if (!useGrouping) { 808 continue; 809 } 810 // Least-significant grouping separator 811 if (magnitude > 0 && magnitude == grouping1) { 812 sb.append(u','); 813 } 814 // All other grouping separators 815 if (magnitude > grouping1 && grouping2 > 0 && (magnitude - grouping1) % grouping2 == 0) { 816 sb.append(u','); 817 } 818 } 819 820 // Exponential notation 821 if (exponentDigits != uprv_min(dosMax, -1)) { 822 sb.append(u'E'); 823 if (exponentShowPlusSign) { 824 sb.append(u'+'); 825 } 826 for (int32_t i = 0; i < exponentDigits; i++) { 827 sb.append(u'0'); 828 } 829 } 830 831 // Suffixes 832 int32_t beforeSuffixPos = sb.length(); 833 sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_POS_SUFFIX)); 834 835 // Resolve Padding 836 if (paddingWidth > 0 && !paddingLocation.isNull()) { 837 while (paddingWidth - sb.length() > 0) { 838 sb.insert(afterPrefixPos, u'#'); 839 beforeSuffixPos++; 840 } 841 int32_t addedLength; 842 switch (paddingLocation.get(status)) { 843 case PadPosition::UNUM_PAD_BEFORE_PREFIX: 844 addedLength = escapePaddingString(paddingString, sb, 0, status); 845 sb.insert(0, u'*'); 846 afterPrefixPos += addedLength + 1; 847 beforeSuffixPos += addedLength + 1; 848 break; 849 case PadPosition::UNUM_PAD_AFTER_PREFIX: 850 addedLength = escapePaddingString(paddingString, sb, afterPrefixPos, status); 851 sb.insert(afterPrefixPos, u'*'); 852 afterPrefixPos += addedLength + 1; 853 beforeSuffixPos += addedLength + 1; 854 break; 855 case PadPosition::UNUM_PAD_BEFORE_SUFFIX: 856 escapePaddingString(paddingString, sb, beforeSuffixPos, status); 857 sb.insert(beforeSuffixPos, u'*'); 858 break; 859 case PadPosition::UNUM_PAD_AFTER_SUFFIX: 860 sb.append(u'*'); 861 escapePaddingString(paddingString, sb, sb.length(), status); 862 break; 863 } 864 if (U_FAILURE(status)) { return sb; } 865 } 866 867 // Negative affixes 868 // Ignore if the negative prefix pattern is "-" and the negative suffix is empty 869 if (affixProvider.get().hasNegativeSubpattern()) { 870 sb.append(u';'); 871 sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_NEG_PREFIX)); 872 // Copy the positive digit format into the negative. 873 // This is optional; the pattern is the same as if '#' were appended here instead. 874 // NOTE: It is not safe to append the UnicodeString to itself, so we need to copy. 875 // See https://unicode-org.atlassian.net/browse/ICU-13707 876 UnicodeString copy(sb); 877 sb.append(copy, afterPrefixPos, beforeSuffixPos - afterPrefixPos); 878 sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_NEG_SUFFIX)); 879 } 880 881 return sb; 882 } 883 884 int PatternStringUtils::escapePaddingString(UnicodeString input, UnicodeString& output, int startIndex, 885 UErrorCode& status) { 886 (void) status; 887 if (input.length() == 0) { 888 input.setTo(kFallbackPaddingString, -1); 889 } 890 int startLength = output.length(); 891 if (input.length() == 1) { 892 if (input.compare(u"'", -1) == 0) { 893 output.insert(startIndex, u"''", -1); 894 } else { 895 output.insert(startIndex, input); 896 } 897 } else { 898 output.insert(startIndex, u'\''); 899 int offset = 1; 900 for (int i = 0; i < input.length(); i++) { 901 // it's okay to deal in chars here because the quote mark is the only interesting thing. 902 char16_t ch = input.charAt(i); 903 if (ch == u'\'') { 904 output.insert(startIndex + offset, u"''", -1); 905 offset += 2; 906 } else { 907 output.insert(startIndex + offset, ch); 908 offset += 1; 909 } 910 } 911 output.insert(startIndex + offset, u'\''); 912 } 913 return output.length() - startLength; 914 } 915 916 UnicodeString 917 PatternStringUtils::convertLocalized(const UnicodeString& input, const DecimalFormatSymbols& symbols, 918 bool toLocalized, UErrorCode& status) { 919 // Construct a table of strings to be converted between localized and standard. 920 static constexpr int32_t LEN = 21; 921 UnicodeString table[LEN][2]; 922 int standIdx = toLocalized ? 0 : 1; 923 int localIdx = toLocalized ? 1 : 0; 924 // TODO: Add approximately sign here? 925 table[0][standIdx] = u"%"; 926 table[0][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPercentSymbol); 927 table[1][standIdx] = u"‰"; 928 table[1][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol); 929 table[2][standIdx] = u"."; 930 table[2][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol); 931 table[3][standIdx] = u","; 932 table[3][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol); 933 table[4][standIdx] = u"-"; 934 table[4][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol); 935 table[5][standIdx] = u"+"; 936 table[5][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol); 937 table[6][standIdx] = u";"; 938 table[6][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPatternSeparatorSymbol); 939 table[7][standIdx] = u"@"; 940 table[7][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kSignificantDigitSymbol); 941 table[8][standIdx] = u"E"; 942 table[8][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol); 943 table[9][standIdx] = u"*"; 944 table[9][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPadEscapeSymbol); 945 table[10][standIdx] = u"#"; 946 table[10][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kDigitSymbol); 947 for (int i = 0; i < 10; i++) { 948 table[11 + i][standIdx] = u'0' + i; 949 table[11 + i][localIdx] = symbols.getConstDigitSymbol(i); 950 } 951 952 // Special case: quotes are NOT allowed to be in any localIdx strings. 953 // Substitute them with '’' instead. 954 for (int32_t i = 0; i < LEN; i++) { 955 table[i][localIdx].findAndReplace(u'\'', u'’'); 956 } 957 958 // Iterate through the string and convert. 959 // State table: 960 // 0 => base state 961 // 1 => first char inside a quoted sequence in input and output string 962 // 2 => inside a quoted sequence in input and output string 963 // 3 => first char after a close quote in input string; 964 // close quote still needs to be written to output string 965 // 4 => base state in input string; inside quoted sequence in output string 966 // 5 => first char inside a quoted sequence in input string; 967 // inside quoted sequence in output string 968 UnicodeString result; 969 int state = 0; 970 for (int offset = 0; offset < input.length(); offset++) { 971 char16_t ch = input.charAt(offset); 972 973 // Handle a quote character (state shift) 974 if (ch == u'\'') { 975 if (state == 0) { 976 result.append(u'\''); 977 state = 1; 978 continue; 979 } else if (state == 1) { 980 result.append(u'\''); 981 state = 0; 982 continue; 983 } else if (state == 2) { 984 state = 3; 985 continue; 986 } else if (state == 3) { 987 result.append(u'\''); 988 result.append(u'\''); 989 state = 1; 990 continue; 991 } else if (state == 4) { 992 state = 5; 993 continue; 994 } else { 995 U_ASSERT(state == 5); 996 result.append(u'\''); 997 result.append(u'\''); 998 state = 4; 999 continue; 1000 } 1001 } 1002 1003 if (state == 0 || state == 3 || state == 4) { 1004 for (auto& pair : table) { 1005 // Perform a greedy match on this symbol string 1006 UnicodeString temp = input.tempSubString(offset, pair[0].length()); 1007 if (temp == pair[0]) { 1008 // Skip ahead past this region for the next iteration 1009 offset += pair[0].length() - 1; 1010 if (state == 3 || state == 4) { 1011 result.append(u'\''); 1012 state = 0; 1013 } 1014 result.append(pair[1]); 1015 goto continue_outer; 1016 } 1017 } 1018 // No replacement found. Check if a special quote is necessary 1019 for (auto& pair : table) { 1020 UnicodeString temp = input.tempSubString(offset, pair[1].length()); 1021 if (temp == pair[1]) { 1022 if (state == 0) { 1023 result.append(u'\''); 1024 state = 4; 1025 } 1026 result.append(ch); 1027 goto continue_outer; 1028 } 1029 } 1030 // Still nothing. Copy the char verbatim. (Add a close quote if necessary) 1031 if (state == 3 || state == 4) { 1032 result.append(u'\''); 1033 state = 0; 1034 } 1035 result.append(ch); 1036 } else { 1037 U_ASSERT(state == 1 || state == 2 || state == 5); 1038 result.append(ch); 1039 state = 2; 1040 } 1041 continue_outer:; 1042 } 1043 // Resolve final quotes 1044 if (state == 3 || state == 4) { 1045 result.append(u'\''); 1046 state = 0; 1047 } 1048 if (state != 0) { 1049 // Malformed localized pattern: unterminated quote 1050 status = U_PATTERN_SYNTAX_ERROR; 1051 } 1052 return result; 1053 } 1054 1055 void PatternStringUtils::patternInfoToStringBuilder(const AffixPatternProvider& patternInfo, bool isPrefix, 1056 PatternSignType patternSignType, 1057 bool approximately, 1058 StandardPlural::Form plural, 1059 bool perMilleReplacesPercent, 1060 bool dropCurrencySymbols, 1061 UnicodeString& output) { 1062 1063 // Should the output render '+' where '-' would normally appear in the pattern? 1064 bool plusReplacesMinusSign = (patternSignType == PATTERN_SIGN_TYPE_POS_SIGN) 1065 && !patternInfo.positiveHasPlusSign(); 1066 1067 // Should we use the affix from the negative subpattern? 1068 // (If not, we will use the positive subpattern.) 1069 bool useNegativeAffixPattern = patternInfo.hasNegativeSubpattern() 1070 && (patternSignType == PATTERN_SIGN_TYPE_NEG 1071 || (patternInfo.negativeHasMinusSign() && (plusReplacesMinusSign || approximately))); 1072 1073 // Resolve the flags for the affix pattern. 1074 int flags = 0; 1075 if (useNegativeAffixPattern) { 1076 flags |= AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN; 1077 } 1078 if (isPrefix) { 1079 flags |= AffixPatternProvider::AFFIX_PREFIX; 1080 } 1081 if (plural != StandardPlural::Form::COUNT) { 1082 U_ASSERT(plural == (AffixPatternProvider::AFFIX_PLURAL_MASK & plural)); 1083 flags |= plural; 1084 } 1085 1086 // Should we prepend a sign to the pattern? 1087 bool prependSign; 1088 if (!isPrefix || useNegativeAffixPattern) { 1089 prependSign = false; 1090 } else if (patternSignType == PATTERN_SIGN_TYPE_NEG) { 1091 prependSign = true; 1092 } else { 1093 prependSign = plusReplacesMinusSign || approximately; 1094 } 1095 1096 // What symbols should take the place of the sign placeholder? 1097 const char16_t* signSymbols = u"-"; 1098 if (approximately) { 1099 if (plusReplacesMinusSign) { 1100 signSymbols = u"~+"; 1101 } else if (patternSignType == PATTERN_SIGN_TYPE_NEG) { 1102 signSymbols = u"~-"; 1103 } else { 1104 signSymbols = u"~"; 1105 } 1106 } else if (plusReplacesMinusSign) { 1107 signSymbols = u"+"; 1108 } 1109 1110 // Compute the number of tokens in the affix pattern (signSymbols is considered one token). 1111 int length = patternInfo.length(flags) + (prependSign ? 1 : 0); 1112 1113 // Finally, set the result into the StringBuilder. 1114 output.remove(); 1115 for (int index = 0; index < length; index++) { 1116 char16_t candidate; 1117 if (prependSign && index == 0) { 1118 candidate = u'-'; 1119 } else if (prependSign) { 1120 candidate = patternInfo.charAt(flags, index - 1); 1121 } else { 1122 candidate = patternInfo.charAt(flags, index); 1123 } 1124 if (candidate == u'-') { 1125 if (u_strlen(signSymbols) == 1) { 1126 candidate = signSymbols[0]; 1127 } else { 1128 output.append(signSymbols[0]); 1129 candidate = signSymbols[1]; 1130 } 1131 } 1132 if (perMilleReplacesPercent && candidate == u'%') { 1133 candidate = u'‰'; 1134 } 1135 if (dropCurrencySymbols && candidate == u'\u00A4') { 1136 continue; 1137 } 1138 output.append(candidate); 1139 } 1140 } 1141 1142 PatternSignType PatternStringUtils::resolveSignDisplay(UNumberSignDisplay signDisplay, Signum signum) { 1143 switch (signDisplay) { 1144 case UNUM_SIGN_AUTO: 1145 case UNUM_SIGN_ACCOUNTING: 1146 switch (signum) { 1147 case SIGNUM_NEG: 1148 case SIGNUM_NEG_ZERO: 1149 return PATTERN_SIGN_TYPE_NEG; 1150 case SIGNUM_POS_ZERO: 1151 case SIGNUM_POS: 1152 return PATTERN_SIGN_TYPE_POS; 1153 default: 1154 break; 1155 } 1156 break; 1157 1158 case UNUM_SIGN_ALWAYS: 1159 case UNUM_SIGN_ACCOUNTING_ALWAYS: 1160 switch (signum) { 1161 case SIGNUM_NEG: 1162 case SIGNUM_NEG_ZERO: 1163 return PATTERN_SIGN_TYPE_NEG; 1164 case SIGNUM_POS_ZERO: 1165 case SIGNUM_POS: 1166 return PATTERN_SIGN_TYPE_POS_SIGN; 1167 default: 1168 break; 1169 } 1170 break; 1171 1172 case UNUM_SIGN_EXCEPT_ZERO: 1173 case UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO: 1174 switch (signum) { 1175 case SIGNUM_NEG: 1176 return PATTERN_SIGN_TYPE_NEG; 1177 case SIGNUM_NEG_ZERO: 1178 case SIGNUM_POS_ZERO: 1179 return PATTERN_SIGN_TYPE_POS; 1180 case SIGNUM_POS: 1181 return PATTERN_SIGN_TYPE_POS_SIGN; 1182 default: 1183 break; 1184 } 1185 break; 1186 1187 case UNUM_SIGN_NEGATIVE: 1188 case UNUM_SIGN_ACCOUNTING_NEGATIVE: 1189 switch (signum) { 1190 case SIGNUM_NEG: 1191 return PATTERN_SIGN_TYPE_NEG; 1192 case SIGNUM_NEG_ZERO: 1193 case SIGNUM_POS_ZERO: 1194 case SIGNUM_POS: 1195 return PATTERN_SIGN_TYPE_POS; 1196 default: 1197 break; 1198 } 1199 break; 1200 1201 case UNUM_SIGN_NEVER: 1202 return PATTERN_SIGN_TYPE_POS; 1203 1204 default: 1205 break; 1206 } 1207 1208 UPRV_UNREACHABLE_EXIT; 1209 return PATTERN_SIGN_TYPE_POS; 1210 } 1211 1212 #endif /* #if !UCONFIG_NO_FORMATTING */