messageformat2_function_registry.cpp (67997B)
1 // © 2024 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #include "unicode/utypes.h" 5 6 #if !UCONFIG_NO_NORMALIZATION 7 8 #if !UCONFIG_NO_FORMATTING 9 10 #if !UCONFIG_NO_MF2 11 12 #include <math.h> 13 #include <cmath> 14 15 #include "unicode/dtptngen.h" 16 #include "unicode/messageformat2.h" 17 #include "unicode/messageformat2_data_model_names.h" 18 #include "unicode/messageformat2_function_registry.h" 19 #include "unicode/normalizer2.h" 20 #include "unicode/simpletz.h" 21 #include "unicode/smpdtfmt.h" 22 #include "charstr.h" 23 #include "double-conversion.h" 24 #include "messageformat2_allocation.h" 25 #include "messageformat2_function_registry_internal.h" 26 #include "messageformat2_macros.h" 27 #include "hash.h" 28 #include "mutex.h" 29 #include "number_types.h" 30 #include "ucln_in.h" 31 #include "uvector.h" // U_ASSERT 32 33 // The C99 standard suggested that C++ implementations not define PRId64 etc. constants 34 // unless this macro is defined. 35 // See the Notes at https://en.cppreference.com/w/cpp/types/integer . 36 // Similar to defining __STDC_LIMIT_MACROS in unicode/ptypes.h . 37 #ifndef __STDC_FORMAT_MACROS 38 # define __STDC_FORMAT_MACROS 39 #endif 40 #include <inttypes.h> 41 #include <math.h> 42 43 U_NAMESPACE_BEGIN 44 45 namespace message2 { 46 47 // Function registry implementation 48 49 Formatter::~Formatter() {} 50 Selector::~Selector() {} 51 FormatterFactory::~FormatterFactory() {} 52 SelectorFactory::~SelectorFactory() {} 53 54 MFFunctionRegistry MFFunctionRegistry::Builder::build() { 55 U_ASSERT(formatters != nullptr && selectors != nullptr && formattersByType != nullptr); 56 MFFunctionRegistry result = MFFunctionRegistry(formatters, selectors, formattersByType); 57 formatters = nullptr; 58 selectors = nullptr; 59 formattersByType = nullptr; 60 return result; 61 } 62 63 MFFunctionRegistry::Builder& MFFunctionRegistry::Builder::adoptSelector(const FunctionName& selectorName, SelectorFactory* selectorFactory, UErrorCode& errorCode) { 64 if (U_SUCCESS(errorCode)) { 65 U_ASSERT(selectors != nullptr); 66 selectors->put(selectorName, selectorFactory, errorCode); 67 } 68 return *this; 69 } 70 71 MFFunctionRegistry::Builder& MFFunctionRegistry::Builder::adoptFormatter(const FunctionName& formatterName, FormatterFactory* formatterFactory, UErrorCode& errorCode) { 72 if (U_SUCCESS(errorCode)) { 73 U_ASSERT(formatters != nullptr); 74 formatters->put(formatterName, formatterFactory, errorCode); 75 } 76 return *this; 77 } 78 79 MFFunctionRegistry::Builder& MFFunctionRegistry::Builder::setDefaultFormatterNameByType(const UnicodeString& type, const FunctionName& functionName, UErrorCode& errorCode) { 80 if (U_SUCCESS(errorCode)) { 81 U_ASSERT(formattersByType != nullptr); 82 FunctionName* f = create<FunctionName>(FunctionName(functionName), errorCode); 83 formattersByType->put(type, f, errorCode); 84 } 85 return *this; 86 } 87 88 MFFunctionRegistry::Builder::Builder(UErrorCode& errorCode) { 89 CHECK_ERROR(errorCode); 90 91 formatters = new Hashtable(); 92 selectors = new Hashtable(); 93 formattersByType = new Hashtable(); 94 if (!(formatters != nullptr && selectors != nullptr && formattersByType != nullptr)) { 95 errorCode = U_MEMORY_ALLOCATION_ERROR; 96 } else { 97 formatters->setValueDeleter(uprv_deleteUObject); 98 selectors->setValueDeleter(uprv_deleteUObject); 99 formattersByType->setValueDeleter(uprv_deleteUObject); 100 } 101 } 102 103 MFFunctionRegistry::Builder::~Builder() { 104 if (formatters != nullptr) { 105 delete formatters; 106 } 107 if (selectors != nullptr) { 108 delete selectors; 109 } 110 if (formattersByType != nullptr) { 111 delete formattersByType; 112 } 113 } 114 115 // Returns non-owned pointer. Returns pointer rather than reference because it can fail. 116 // Returns non-const because FormatterFactory is mutable. 117 // TODO: This is unsafe because of the cached-formatters map 118 // (the caller could delete the resulting pointer) 119 FormatterFactory* MFFunctionRegistry::getFormatter(const FunctionName& formatterName) const { 120 U_ASSERT(formatters != nullptr); 121 return static_cast<FormatterFactory*>(formatters->get(formatterName)); 122 } 123 124 UBool MFFunctionRegistry::getDefaultFormatterNameByType(const UnicodeString& type, FunctionName& name) const { 125 U_ASSERT(formatters != nullptr); 126 const FunctionName* f = static_cast<FunctionName*>(formattersByType->get(type)); 127 if (f != nullptr) { 128 name = *f; 129 return true; 130 } 131 return false; 132 } 133 134 const SelectorFactory* MFFunctionRegistry::getSelector(const FunctionName& selectorName) const { 135 U_ASSERT(selectors != nullptr); 136 return static_cast<const SelectorFactory*>(selectors->get(selectorName)); 137 } 138 139 bool MFFunctionRegistry::hasFormatter(const FunctionName& f) const { 140 return getFormatter(f) != nullptr; 141 } 142 143 bool MFFunctionRegistry::hasSelector(const FunctionName& s) const { 144 return getSelector(s) != nullptr; 145 } 146 147 void MFFunctionRegistry::checkFormatter(const char* s) const { 148 #if U_DEBUG 149 U_ASSERT(hasFormatter(FunctionName(UnicodeString(s)))); 150 #else 151 (void) s; 152 #endif 153 } 154 155 void MFFunctionRegistry::checkSelector(const char* s) const { 156 #if U_DEBUG 157 U_ASSERT(hasSelector(FunctionName(UnicodeString(s)))); 158 #else 159 (void) s; 160 #endif 161 } 162 163 // Debugging 164 void MFFunctionRegistry::checkStandard() const { 165 checkFormatter("datetime"); 166 checkFormatter("date"); 167 checkFormatter("time"); 168 checkFormatter("number"); 169 checkFormatter("integer"); 170 checkFormatter("test:function"); 171 checkFormatter("test:format"); 172 checkSelector("number"); 173 checkSelector("integer"); 174 checkSelector("string"); 175 checkSelector("test:function"); 176 checkSelector("test:select"); 177 } 178 179 // Formatter/selector helpers 180 181 // Returns the NFC-normalized version of s, returning s itself 182 // if it's already normalized. 183 /* static */ UnicodeString StandardFunctions::normalizeNFC(const UnicodeString& s) { 184 UErrorCode status = U_ZERO_ERROR; 185 const Normalizer2* nfcNormalizer = Normalizer2::getNFCInstance(status); 186 if (U_FAILURE(status)) { 187 return s; 188 } 189 // Check if string is already normalized 190 UNormalizationCheckResult result = nfcNormalizer->quickCheck(s, status); 191 // If so, return it 192 if (U_SUCCESS(status) && result == UNORM_YES) { 193 return s; 194 } 195 // Otherwise, normalize it 196 UnicodeString normalized = nfcNormalizer->normalize(s, status); 197 if (U_FAILURE(status)) { 198 return {}; 199 } 200 return normalized; 201 } 202 203 // Converts `s` to a double, indicating failure via `errorCode` 204 static void strToDouble(const UnicodeString& s, double& result, UErrorCode& errorCode) { 205 CHECK_ERROR(errorCode); 206 207 // Using en-US locale because it happens to correspond to the spec: 208 // https://github.com/unicode-org/message-format-wg/blob/main/spec/registry.md#number-operands 209 // Ideally, this should re-use the code for parsing number literals (Parser::parseUnquotedLiteral()) 210 // It's hard to reuse the same code because of how parse errors work. 211 // TODO: Refactor 212 LocalPointer<NumberFormat> numberFormat(NumberFormat::createInstance(Locale("en-US"), errorCode)); 213 CHECK_ERROR(errorCode); 214 icu::Formattable asNumber; 215 numberFormat->parse(s, asNumber, errorCode); 216 CHECK_ERROR(errorCode); 217 result = asNumber.getDouble(errorCode); 218 } 219 220 static double tryStringAsNumber(const Locale& locale, const Formattable& val, UErrorCode& errorCode) { 221 // Check for a string option, try to parse it as a number if present 222 UnicodeString tempString = val.getString(errorCode); 223 LocalPointer<NumberFormat> numberFormat(NumberFormat::createInstance(locale, errorCode)); 224 if (U_SUCCESS(errorCode)) { 225 icu::Formattable asNumber; 226 numberFormat->parse(tempString, asNumber, errorCode); 227 if (U_SUCCESS(errorCode)) { 228 return asNumber.getDouble(errorCode); 229 } 230 } 231 return 0; 232 } 233 234 static int64_t getInt64Value(const Locale& locale, const Formattable& value, UErrorCode& errorCode) { 235 if (U_SUCCESS(errorCode)) { 236 if (!value.isNumeric()) { 237 double doubleResult = tryStringAsNumber(locale, value, errorCode); 238 if (U_SUCCESS(errorCode)) { 239 return static_cast<int64_t>(doubleResult); 240 } 241 } 242 else { 243 int64_t result = value.getInt64(errorCode); 244 if (U_SUCCESS(errorCode)) { 245 return result; 246 } 247 } 248 } 249 // Option was numeric but couldn't be converted to int64_t -- could be overflow 250 return 0; 251 } 252 253 // Adopts its arguments 254 MFFunctionRegistry::MFFunctionRegistry(FormatterMap* f, SelectorMap* s, Hashtable* byType) : formatters(f), selectors(s), formattersByType(byType) { 255 U_ASSERT(f != nullptr && s != nullptr && byType != nullptr); 256 } 257 258 MFFunctionRegistry& MFFunctionRegistry::operator=(MFFunctionRegistry&& other) noexcept { 259 cleanup(); 260 261 formatters = other.formatters; 262 selectors = other.selectors; 263 formattersByType = other.formattersByType; 264 other.formatters = nullptr; 265 other.selectors = nullptr; 266 other.formattersByType = nullptr; 267 268 return *this; 269 } 270 271 void MFFunctionRegistry::cleanup() noexcept { 272 if (formatters != nullptr) { 273 delete formatters; 274 } 275 if (selectors != nullptr) { 276 delete selectors; 277 } 278 if (formattersByType != nullptr) { 279 delete formattersByType; 280 } 281 } 282 283 284 MFFunctionRegistry::~MFFunctionRegistry() { 285 cleanup(); 286 } 287 288 // Specific formatter implementations 289 290 // --------- Number 291 292 bool inBounds(const UnicodeString& s, int32_t i) { 293 return i < s.length(); 294 } 295 296 bool isDigit(UChar32 c) { 297 return c >= '0' && c <= '9'; 298 } 299 300 bool parseDigits(const UnicodeString& s, int32_t& i) { 301 if (!isDigit(s[i])) { 302 return false; 303 } 304 while (inBounds(s, i) && isDigit(s[i])) { 305 i++; 306 } 307 return true; 308 } 309 310 // number-literal = ["-"] (%x30 / (%x31-39 *DIGIT)) ["." 1*DIGIT] [%i"e" ["-" / "+"] 1*DIGIT] 311 bool validateNumberLiteral(const UnicodeString& s) { 312 int32_t i = 0; 313 314 if (s.isEmpty()) { 315 return false; 316 } 317 318 // Parse optional sign 319 // ["-"] 320 if (s[0] == HYPHEN) { 321 i++; 322 } 323 324 if (!inBounds(s, i)) { 325 return false; 326 } 327 328 // Parse integer digits 329 // (%x30 / (%x31-39 *DIGIT)) 330 if (s[i] == '0') { 331 if (!inBounds(s, i + 1) || s[i + 1] != PERIOD) { 332 return false; 333 } 334 i++; 335 } else { 336 if (!parseDigits(s, i)) { 337 return false; 338 } 339 } 340 // The rest is optional 341 if (!inBounds(s, i)) { 342 return true; 343 } 344 345 // Parse optional decimal digits 346 // ["." 1*DIGIT] 347 if (s[i] == PERIOD) { 348 i++; 349 if (!parseDigits(s, i)) { 350 return false; 351 } 352 } 353 354 if (!inBounds(s, i)) { 355 return true; 356 } 357 358 // Parse optional exponent 359 // [%i"e" ["-" / "+"] 1*DIGIT] 360 if (s[i] == 'e' || s[i] == 'E') { 361 i++; 362 if (!inBounds(s, i)) { 363 return false; 364 } 365 // Parse optional sign 366 if (s[i] == HYPHEN || s[i] == PLUS) { 367 i++; 368 } 369 if (!inBounds(s, i)) { 370 return false; 371 } 372 if (!parseDigits(s, i)) { 373 return false; 374 } 375 } 376 if (i != s.length()) { 377 return false; 378 } 379 return true; 380 } 381 382 bool isInteger(const Formattable& s) { 383 switch (s.getType()) { 384 case UFMT_DOUBLE: 385 case UFMT_LONG: 386 case UFMT_INT64: 387 return true; 388 case UFMT_STRING: { 389 UErrorCode ignore = U_ZERO_ERROR; 390 const UnicodeString& str = s.getString(ignore); 391 return validateNumberLiteral(str); 392 } 393 default: 394 return false; 395 } 396 } 397 398 bool isDigitSizeOption(const UnicodeString& s) { 399 return s == UnicodeString("minimumIntegerDigits") 400 || s == UnicodeString("minimumFractionDigits") 401 || s == UnicodeString("maximumFractionDigits") 402 || s == UnicodeString("minimumSignificantDigits") 403 || s == UnicodeString("maximumSignificantDigits"); 404 } 405 406 /* static */ void StandardFunctions::validateDigitSizeOptions(const FunctionOptions& opts, 407 UErrorCode& status) { 408 CHECK_ERROR(status); 409 410 for (int32_t i = 0; i < opts.optionsCount(); i++) { 411 const ResolvedFunctionOption& opt = opts.options[i]; 412 if (isDigitSizeOption(opt.getName()) && !isInteger(opt.getValue())) { 413 status = U_MF_BAD_OPTION; 414 return; 415 } 416 } 417 } 418 419 /* static */ number::LocalizedNumberFormatter StandardFunctions::formatterForOptions(const Number& number, 420 const FunctionOptions& opts, 421 UErrorCode& status) { 422 number::UnlocalizedNumberFormatter nf; 423 424 using namespace number; 425 426 validateDigitSizeOptions(opts, status); 427 if (U_FAILURE(status)) { 428 return {}; 429 } 430 431 if (U_SUCCESS(status)) { 432 Formattable opt; 433 nf = NumberFormatter::with(); 434 bool isInteger = number.isInteger; 435 436 if (isInteger) { 437 nf = nf.precision(Precision::integer()); 438 } 439 440 // Notation options 441 if (!isInteger) { 442 // These options only apply to `:number` 443 444 // Default notation is simple 445 Notation notation = Notation::simple(); 446 UnicodeString notationOpt = opts.getStringFunctionOption(options::NOTATION); 447 if (notationOpt == options::SCIENTIFIC) { 448 notation = Notation::scientific(); 449 } else if (notationOpt == options::ENGINEERING) { 450 notation = Notation::engineering(); 451 } else if (notationOpt == options::COMPACT) { 452 UnicodeString displayOpt = opts.getStringFunctionOption(options::COMPACT_DISPLAY); 453 if (displayOpt == options::LONG) { 454 notation = Notation::compactLong(); 455 } else { 456 // Default is short 457 notation = Notation::compactShort(); 458 } 459 } else { 460 // Already set to default 461 } 462 nf = nf.notation(notation); 463 } 464 465 // Style options -- specific to `:number` 466 if (!isInteger) { 467 if (number.usePercent(opts)) { 468 nf = nf.unit(NoUnit::percent()).scale(Scale::powerOfTen(2)); 469 } 470 } 471 472 int32_t maxSignificantDigits = number.maximumSignificantDigits(opts); 473 if (!isInteger) { 474 int32_t minFractionDigits = number.minimumFractionDigits(opts); 475 int32_t maxFractionDigits = number.maximumFractionDigits(opts); 476 int32_t minSignificantDigits = number.minimumSignificantDigits(opts); 477 Precision p = Precision::unlimited(); 478 bool precisionOptions = false; 479 480 // Returning -1 means the option wasn't provided 481 if (maxFractionDigits != -1 && minFractionDigits != -1) { 482 precisionOptions = true; 483 p = Precision::minMaxFraction(minFractionDigits, maxFractionDigits); 484 } else if (minFractionDigits != -1) { 485 precisionOptions = true; 486 p = Precision::minFraction(minFractionDigits); 487 } else if (maxFractionDigits != -1) { 488 precisionOptions = true; 489 p = Precision::maxFraction(maxFractionDigits); 490 } 491 492 if (minSignificantDigits != -1) { 493 precisionOptions = true; 494 p = p.minSignificantDigits(minSignificantDigits); 495 } 496 if (maxSignificantDigits != -1) { 497 precisionOptions = true; 498 p = p.maxSignificantDigits(maxSignificantDigits); 499 } 500 if (precisionOptions) { 501 nf = nf.precision(p); 502 } 503 } else { 504 // maxSignificantDigits applies to `:integer`, but the other precision options don't 505 Precision p = Precision::integer(); 506 if (maxSignificantDigits != -1) { 507 p = p.maxSignificantDigits(maxSignificantDigits); 508 } 509 nf = nf.precision(p); 510 } 511 512 // All other options apply to both `:number` and `:integer` 513 int32_t minIntegerDigits = number.minimumIntegerDigits(opts); 514 nf = nf.integerWidth(IntegerWidth::zeroFillTo(minIntegerDigits)); 515 516 // signDisplay 517 UnicodeString sd = opts.getStringFunctionOption(options::SIGN_DISPLAY); 518 UNumberSignDisplay signDisplay; 519 if (sd == options::ALWAYS) { 520 signDisplay = UNumberSignDisplay::UNUM_SIGN_ALWAYS; 521 } else if (sd == options::EXCEPT_ZERO) { 522 signDisplay = UNumberSignDisplay::UNUM_SIGN_EXCEPT_ZERO; 523 } else if (sd == options::NEGATIVE) { 524 signDisplay = UNumberSignDisplay::UNUM_SIGN_NEGATIVE; 525 } else if (sd == options::NEVER) { 526 signDisplay = UNumberSignDisplay::UNUM_SIGN_NEVER; 527 } else { 528 signDisplay = UNumberSignDisplay::UNUM_SIGN_AUTO; 529 } 530 nf = nf.sign(signDisplay); 531 532 // useGrouping 533 UnicodeString ug = opts.getStringFunctionOption(options::USE_GROUPING); 534 UNumberGroupingStrategy grp; 535 if (ug == options::ALWAYS) { 536 grp = UNumberGroupingStrategy::UNUM_GROUPING_ON_ALIGNED; 537 } else if (ug == options::NEVER) { 538 grp = UNumberGroupingStrategy::UNUM_GROUPING_OFF; 539 } else if (ug == options::MIN2) { 540 grp = UNumberGroupingStrategy::UNUM_GROUPING_MIN2; 541 } else { 542 // Default is "auto" 543 grp = UNumberGroupingStrategy::UNUM_GROUPING_AUTO; 544 } 545 nf = nf.grouping(grp); 546 547 // numberingSystem 548 UnicodeString ns = opts.getStringFunctionOption(options::NUMBERING_SYSTEM); 549 if (ns.length() > 0) { 550 ns = ns.toLower(Locale("en-US")); 551 CharString buffer; 552 // Ignore bad option values, so use a local status 553 UErrorCode localStatus = U_ZERO_ERROR; 554 // Copied from number_skeletons.cpp (helpers::parseNumberingSystemOption) 555 buffer.appendInvariantChars({false, ns.getBuffer(), ns.length()}, localStatus); 556 if (U_SUCCESS(localStatus)) { 557 LocalPointer<NumberingSystem> symbols 558 (NumberingSystem::createInstanceByName(buffer.data(), localStatus)); 559 if (U_SUCCESS(localStatus)) { 560 nf = nf.adoptSymbols(symbols.orphan()); 561 } 562 } 563 } 564 } 565 return nf.locale(number.locale); 566 } 567 568 Formatter* StandardFunctions::NumberFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { 569 NULL_ON_ERROR(errorCode); 570 571 Formatter* result = new Number(locale); 572 if (result == nullptr) { 573 errorCode = U_MEMORY_ALLOCATION_ERROR; 574 } 575 return result; 576 } 577 578 Formatter* StandardFunctions::IntegerFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { 579 NULL_ON_ERROR(errorCode); 580 581 Formatter* result = new Number(Number::integer(locale)); 582 if (result == nullptr) { 583 errorCode = U_MEMORY_ALLOCATION_ERROR; 584 } 585 return result; 586 } 587 588 StandardFunctions::IntegerFactory::~IntegerFactory() {} 589 590 static FormattedPlaceholder notANumber(const FormattedPlaceholder& input) { 591 return FormattedPlaceholder(input, FormattedValue(UnicodeString("NaN"))); 592 } 593 594 static double parseNumberLiteral(const Formattable& input, UErrorCode& errorCode) { 595 if (U_FAILURE(errorCode)) { 596 return {}; 597 } 598 599 // Copying string to avoid GCC dangling-reference warning 600 // (although the reference is safe) 601 UnicodeString inputStr = input.getString(errorCode); 602 // Precondition: `input`'s source Formattable has type string 603 if (U_FAILURE(errorCode)) { 604 return {}; 605 } 606 607 // Validate string according to `number-literal` production 608 // in the spec for `:number`. This is because some cases are 609 // forbidden by this grammar, but allowed by StringToDouble. 610 if (!validateNumberLiteral(inputStr)) { 611 errorCode = U_MF_OPERAND_MISMATCH_ERROR; 612 return 0; 613 } 614 615 // Convert to double using double_conversion::StringToDoubleConverter 616 using namespace double_conversion; 617 int processedCharactersCount = 0; 618 StringToDoubleConverter converter(0, 0, 0, "", ""); 619 int32_t len = inputStr.length(); 620 double result = 621 converter.StringToDouble(reinterpret_cast<const uint16_t*>(inputStr.getBuffer()), 622 len, 623 &processedCharactersCount); 624 if (processedCharactersCount != len) { 625 errorCode = U_MF_OPERAND_MISMATCH_ERROR; 626 } 627 return result; 628 } 629 630 static UChar32 digitToChar(int32_t val, UErrorCode errorCode) { 631 if (U_FAILURE(errorCode)) { 632 return '0'; 633 } 634 if (val < 0 || val > 9) { 635 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 636 } 637 switch(val) { 638 case 0: 639 return '0'; 640 case 1: 641 return '1'; 642 case 2: 643 return '2'; 644 case 3: 645 return '3'; 646 case 4: 647 return '4'; 648 case 5: 649 return '5'; 650 case 6: 651 return '6'; 652 case 7: 653 return '7'; 654 case 8: 655 return '8'; 656 case 9: 657 return '9'; 658 default: 659 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 660 return '0'; 661 } 662 } 663 664 int32_t StandardFunctions::Number::maximumFractionDigits(const FunctionOptions& opts) const { 665 Formattable opt; 666 667 if (isInteger) { 668 return 0; 669 } 670 671 if (opts.getFunctionOption(options::MAXIMUM_FRACTION_DIGITS, opt)) { 672 UErrorCode localErrorCode = U_ZERO_ERROR; 673 int64_t val = getInt64Value(locale, opt, localErrorCode); 674 if (U_SUCCESS(localErrorCode)) { 675 return static_cast<int32_t>(val); 676 } 677 } 678 // Returning -1 indicates that the option wasn't provided or was a non-integer. 679 // The caller needs to check for that case, since passing -1 to Precision::maxFraction() 680 // is an error. 681 return -1; 682 } 683 684 int32_t StandardFunctions::Number::minimumFractionDigits(const FunctionOptions& opts) const { 685 Formattable opt; 686 687 if (!isInteger) { 688 if (opts.getFunctionOption(options::MINIMUM_FRACTION_DIGITS, opt)) { 689 UErrorCode localErrorCode = U_ZERO_ERROR; 690 int64_t val = getInt64Value(locale, opt, localErrorCode); 691 if (U_SUCCESS(localErrorCode)) { 692 return static_cast<int32_t>(val); 693 } 694 } 695 } 696 // Returning -1 indicates that the option wasn't provided or was a non-integer. 697 // The caller needs to check for that case, since passing -1 to Precision::minFraction() 698 // is an error. 699 return -1; 700 } 701 702 int32_t StandardFunctions::Number::minimumIntegerDigits(const FunctionOptions& opts) const { 703 Formattable opt; 704 705 if (opts.getFunctionOption(options::MINIMUM_INTEGER_DIGITS, opt)) { 706 UErrorCode localErrorCode = U_ZERO_ERROR; 707 int64_t val = getInt64Value(locale, opt, localErrorCode); 708 if (U_SUCCESS(localErrorCode)) { 709 return static_cast<int32_t>(val); 710 } 711 } 712 return 1; 713 } 714 715 int32_t StandardFunctions::Number::minimumSignificantDigits(const FunctionOptions& opts) const { 716 Formattable opt; 717 718 if (!isInteger) { 719 if (opts.getFunctionOption(options::MINIMUM_SIGNIFICANT_DIGITS, opt)) { 720 UErrorCode localErrorCode = U_ZERO_ERROR; 721 int64_t val = getInt64Value(locale, opt, localErrorCode); 722 if (U_SUCCESS(localErrorCode)) { 723 return static_cast<int32_t>(val); 724 } 725 } 726 } 727 // Returning -1 indicates that the option wasn't provided or was a non-integer. 728 // The caller needs to check for that case, since passing -1 to Precision::minSignificantDigits() 729 // is an error. 730 return -1; 731 } 732 733 int32_t StandardFunctions::Number::maximumSignificantDigits(const FunctionOptions& opts) const { 734 Formattable opt; 735 736 if (opts.getFunctionOption(options::MAXIMUM_SIGNIFICANT_DIGITS, opt)) { 737 UErrorCode localErrorCode = U_ZERO_ERROR; 738 int64_t val = getInt64Value(locale, opt, localErrorCode); 739 if (U_SUCCESS(localErrorCode)) { 740 return static_cast<int32_t>(val); 741 } 742 } 743 // Returning -1 indicates that the option wasn't provided or was a non-integer. 744 // The caller needs to check for that case, since passing -1 to Precision::maxSignificantDigits() 745 // is an error. 746 return -1; // Not a valid value for Precision; has to be checked 747 } 748 749 bool StandardFunctions::Number::usePercent(const FunctionOptions& opts) const { 750 Formattable opt; 751 if (isInteger 752 || !opts.getFunctionOption(options::STYLE, opt) 753 || opt.getType() != UFMT_STRING) { 754 return false; 755 } 756 UErrorCode localErrorCode = U_ZERO_ERROR; 757 const UnicodeString& style = opt.getString(localErrorCode); 758 U_ASSERT(U_SUCCESS(localErrorCode)); 759 return (style == options::PERCENT_STRING); 760 } 761 762 /* static */ StandardFunctions::Number StandardFunctions::Number::integer(const Locale& loc) { 763 return StandardFunctions::Number(loc, true); 764 } 765 766 FormattedPlaceholder StandardFunctions::Number::format(FormattedPlaceholder&& arg, FunctionOptions&& opts, UErrorCode& errorCode) const { 767 if (U_FAILURE(errorCode)) { 768 return {}; 769 } 770 771 // No argument => return "NaN" 772 if (!arg.canFormat()) { 773 errorCode = U_MF_OPERAND_MISMATCH_ERROR; 774 return notANumber(arg); 775 } 776 777 number::LocalizedNumberFormatter realFormatter; 778 realFormatter = formatterForOptions(*this, opts, errorCode); 779 780 number::FormattedNumber numberResult; 781 int64_t integerValue = 0; 782 783 if (U_SUCCESS(errorCode)) { 784 // Already checked that contents can be formatted 785 const Formattable& toFormat = arg.asFormattable(); 786 switch (toFormat.getType()) { 787 case UFMT_DOUBLE: { 788 double d = toFormat.getDouble(errorCode); 789 U_ASSERT(U_SUCCESS(errorCode)); 790 numberResult = realFormatter.formatDouble(d, errorCode); 791 integerValue = static_cast<int64_t>(std::round(d)); 792 break; 793 } 794 case UFMT_LONG: { 795 int32_t l = toFormat.getLong(errorCode); 796 U_ASSERT(U_SUCCESS(errorCode)); 797 numberResult = realFormatter.formatInt(l, errorCode); 798 integerValue = l; 799 break; 800 } 801 case UFMT_INT64: { 802 int64_t i = toFormat.getInt64(errorCode); 803 U_ASSERT(U_SUCCESS(errorCode)); 804 numberResult = realFormatter.formatInt(i, errorCode); 805 integerValue = i; 806 break; 807 } 808 case UFMT_STRING: { 809 // Try to parse the string as a number 810 double d = parseNumberLiteral(toFormat, errorCode); 811 if (U_FAILURE(errorCode)) 812 return {}; 813 numberResult = realFormatter.formatDouble(d, errorCode); 814 integerValue = static_cast<int64_t>(std::round(d)); 815 break; 816 } 817 default: { 818 // Other types can't be parsed as a number 819 errorCode = U_MF_OPERAND_MISMATCH_ERROR; 820 return notANumber(arg); 821 } 822 } 823 } 824 825 // Need to return the integer value if invoked as :integer 826 if (isInteger) { 827 return FormattedPlaceholder(FormattedPlaceholder(Formattable(integerValue), arg.getFallback()), 828 std::move(opts), 829 FormattedValue(std::move(numberResult))); 830 } 831 return FormattedPlaceholder(arg, std::move(opts), FormattedValue(std::move(numberResult))); 832 } 833 834 StandardFunctions::Number::~Number() {} 835 StandardFunctions::NumberFactory::~NumberFactory() {} 836 837 // --------- PluralFactory 838 839 StandardFunctions::Plural::PluralType StandardFunctions::Plural::pluralType(const FunctionOptions& opts) const { 840 Formattable opt; 841 842 if (opts.getFunctionOption(options::SELECT, opt)) { 843 UErrorCode localErrorCode = U_ZERO_ERROR; 844 UnicodeString val = opt.getString(localErrorCode); 845 if (U_SUCCESS(localErrorCode)) { 846 if (val == options::ORDINAL) { 847 return PluralType::PLURAL_ORDINAL; 848 } 849 if (val == options::EXACT) { 850 return PluralType::PLURAL_EXACT; 851 } 852 } 853 } 854 return PluralType::PLURAL_CARDINAL; 855 } 856 857 Selector* StandardFunctions::PluralFactory::createSelector(const Locale& locale, UErrorCode& errorCode) const { 858 NULL_ON_ERROR(errorCode); 859 860 Selector* result; 861 if (isInteger) { 862 result = new Plural(Plural::integer(locale, errorCode)); 863 } else { 864 result = new Plural(locale, errorCode); 865 } 866 NULL_ON_ERROR(errorCode); 867 if (result == nullptr) { 868 errorCode = U_MEMORY_ALLOCATION_ERROR; 869 } 870 return result; 871 } 872 873 void StandardFunctions::Plural::selectKey(FormattedPlaceholder&& toFormat, 874 FunctionOptions&& opts, 875 const UnicodeString* keys, 876 int32_t keysLen, 877 UnicodeString* prefs, 878 int32_t& prefsLen, 879 UErrorCode& errorCode) const { 880 CHECK_ERROR(errorCode); 881 882 // No argument => return "NaN" 883 if (!toFormat.canFormat()) { 884 errorCode = U_MF_SELECTOR_ERROR; 885 return; 886 } 887 888 // Handle any formatting options 889 PluralType type = pluralType(opts); 890 FormattedPlaceholder resolvedSelector = numberFormatter->format(std::move(toFormat), 891 std::move(opts), 892 errorCode); 893 CHECK_ERROR(errorCode); 894 895 U_ASSERT(resolvedSelector.isEvaluated() && resolvedSelector.output().isNumber()); 896 897 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/registry.md#number-selection 898 // 1. Let exact be the JSON string representation of the numeric value of resolvedSelector 899 const number::FormattedNumber& formattedNumber = resolvedSelector.output().getNumber(); 900 UnicodeString exact = formattedNumber.toString(errorCode); 901 902 if (U_FAILURE(errorCode)) { 903 // Non-number => selector error 904 errorCode = U_MF_SELECTOR_ERROR; 905 return; 906 } 907 908 // Step 2. Let keyword be a string which is the result of rule selection on resolvedSelector. 909 // If the option select is set to exact, rule-based selection is not used. Return the empty string. 910 UnicodeString keyword; 911 if (type != PluralType::PLURAL_EXACT) { 912 UPluralType t = type == PluralType::PLURAL_ORDINAL ? UPLURAL_TYPE_ORDINAL : UPLURAL_TYPE_CARDINAL; 913 // Look up plural rules by locale and type 914 LocalPointer<PluralRules> rules(PluralRules::forLocale(locale, t, errorCode)); 915 CHECK_ERROR(errorCode); 916 917 keyword = rules->select(formattedNumber, errorCode); 918 } 919 920 // Steps 3-4 elided: 921 // 3. Let resultExact be a new empty list of strings. 922 // 4. Let resultKeyword be a new empty list of strings. 923 // Instead, we use `prefs` the concatenation of `resultExact` 924 // and `resultKeyword`. 925 926 prefsLen = 0; 927 928 // 5. For each string key in keys: 929 double keyAsDouble = 0; 930 for (int32_t i = 0; i < keysLen; i++) { 931 // Try parsing the key as a double 932 UErrorCode localErrorCode = U_ZERO_ERROR; 933 strToDouble(keys[i], keyAsDouble, localErrorCode); 934 // 5i. If the value of key matches the production number-literal, then 935 if (U_SUCCESS(localErrorCode)) { 936 // 5i(a). If key and exact consist of the same sequence of Unicode code points, then 937 if (exact == keys[i]) { 938 // 5i(a)(a) Append key as the last element of the list resultExact. 939 prefs[prefsLen] = keys[i]; 940 prefsLen++; 941 break; 942 } 943 } 944 } 945 946 // Return immediately if exact matching was requested 947 if (prefsLen == keysLen || type == PluralType::PLURAL_EXACT) { 948 return; 949 } 950 951 952 for (int32_t i = 0; i < keysLen; i ++) { 953 if (prefsLen >= keysLen) { 954 break; 955 } 956 // 5ii. Else if key is one of the keywords zero, one, two, few, many, or other, then 957 // 5ii(a). If key and keyword consist of the same sequence of Unicode code points, then 958 if (keyword == keys[i]) { 959 // 5ii(a)(a) Append key as the last element of the list resultKeyword. 960 prefs[prefsLen] = keys[i]; 961 prefsLen++; 962 } 963 } 964 965 // Note: Step 5(iii) "Else, emit a Selection Error" is omitted in both loops 966 967 // 6. Return a new list whose elements are the concatenation of the elements 968 // (in order) of resultExact followed by the elements (in order) of resultKeyword. 969 // (Implicit, since `prefs` is an out-parameter) 970 } 971 972 StandardFunctions::Plural::Plural(const Locale& loc, UErrorCode& status) : locale(loc) { 973 CHECK_ERROR(status); 974 975 numberFormatter.adoptInstead(new StandardFunctions::Number(loc)); 976 if (!numberFormatter.isValid()) { 977 status = U_MEMORY_ALLOCATION_ERROR; 978 } 979 } 980 981 StandardFunctions::Plural::Plural(const Locale& loc, bool isInt, UErrorCode& status) : locale(loc), isInteger(isInt) { 982 CHECK_ERROR(status); 983 984 if (isInteger) { 985 numberFormatter.adoptInstead(new StandardFunctions::Number(loc, true)); 986 } else { 987 numberFormatter.adoptInstead(new StandardFunctions::Number(loc)); 988 } 989 990 if (!numberFormatter.isValid()) { 991 status = U_MEMORY_ALLOCATION_ERROR; 992 } 993 } 994 995 StandardFunctions::Plural::~Plural() {} 996 997 StandardFunctions::PluralFactory::~PluralFactory() {} 998 999 // --------- DateTimeFactory 1000 1001 /* static */ UnicodeString StandardFunctions::getStringOption(const FunctionOptions& opts, 1002 std::u16string_view optionName, 1003 UErrorCode& errorCode) { 1004 if (U_SUCCESS(errorCode)) { 1005 Formattable opt; 1006 if (opts.getFunctionOption(optionName, opt)) { 1007 return opt.getString(errorCode); // In case it's not a string, error code will be set 1008 } else { 1009 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 1010 } 1011 } 1012 // Default is empty string 1013 return {}; 1014 } 1015 1016 // Date/time options only 1017 static UnicodeString defaultForOption(std::u16string_view optionName) { 1018 if (optionName == options::DATE_STYLE 1019 || optionName == options::TIME_STYLE 1020 || optionName == options::STYLE) { 1021 return UnicodeString(options::SHORT); 1022 } 1023 return {}; // Empty string is default 1024 } 1025 1026 // TODO 1027 // Only DateTime currently uses the function options stored in the placeholder. 1028 // It also doesn't use them very consistently (it looks at the previous set of options, 1029 // and others aren't preserved). This needs to be generalized, 1030 // but that depends on https://github.com/unicode-org/message-format-wg/issues/515 1031 // Finally, the option value is assumed to be a string, 1032 // which works for datetime options but not necessarily in general. 1033 UnicodeString StandardFunctions::DateTime::getFunctionOption(const FormattedPlaceholder& toFormat, 1034 const FunctionOptions& opts, 1035 std::u16string_view optionName) const { 1036 // Options passed to the current function invocation take priority 1037 Formattable opt; 1038 UnicodeString s; 1039 UErrorCode localErrorCode = U_ZERO_ERROR; 1040 s = getStringOption(opts, optionName, localErrorCode); 1041 if (U_SUCCESS(localErrorCode)) { 1042 return s; 1043 } 1044 // Next try the set of options used to construct `toFormat` 1045 localErrorCode = U_ZERO_ERROR; 1046 s = getStringOption(toFormat.options(), optionName, localErrorCode); 1047 if (U_SUCCESS(localErrorCode)) { 1048 return s; 1049 } 1050 // Finally, use default 1051 return defaultForOption(optionName); 1052 } 1053 1054 // Used for options that don't have defaults 1055 UnicodeString StandardFunctions::DateTime::getFunctionOption(const FormattedPlaceholder& toFormat, 1056 const FunctionOptions& opts, 1057 std::u16string_view optionName, 1058 UErrorCode& errorCode) const { 1059 if (U_SUCCESS(errorCode)) { 1060 // Options passed to the current function invocation take priority 1061 Formattable opt; 1062 UnicodeString s; 1063 UErrorCode localErrorCode = U_ZERO_ERROR; 1064 s = getStringOption(opts, optionName, localErrorCode); 1065 if (U_SUCCESS(localErrorCode)) { 1066 return s; 1067 } 1068 // Next try the set of options used to construct `toFormat` 1069 localErrorCode = U_ZERO_ERROR; 1070 s = getStringOption(toFormat.options(), optionName, localErrorCode); 1071 if (U_SUCCESS(localErrorCode)) { 1072 return s; 1073 } 1074 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 1075 } 1076 return {}; 1077 } 1078 1079 static DateFormat::EStyle stringToStyle(UnicodeString option, UErrorCode& errorCode) { 1080 if (U_SUCCESS(errorCode)) { 1081 UnicodeString upper = option.toUpper(); 1082 if (upper == options::FULL_UPPER) { 1083 return DateFormat::EStyle::kFull; 1084 } 1085 if (upper == options::LONG_UPPER) { 1086 return DateFormat::EStyle::kLong; 1087 } 1088 if (upper == options::MEDIUM_UPPER) { 1089 return DateFormat::EStyle::kMedium; 1090 } 1091 if (upper == options::SHORT_UPPER) { 1092 return DateFormat::EStyle::kShort; 1093 } 1094 if (upper.isEmpty() || upper == options::DEFAULT_UPPER) { 1095 return DateFormat::EStyle::kDefault; 1096 } 1097 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 1098 } 1099 return DateFormat::EStyle::kNone; 1100 } 1101 1102 /* static */ StandardFunctions::DateTimeFactory* StandardFunctions::DateTimeFactory::dateTime(UErrorCode& errorCode) { 1103 NULL_ON_ERROR(errorCode); 1104 1105 DateTimeFactory* result = new StandardFunctions::DateTimeFactory(DateTimeType::DateTime); 1106 if (result == nullptr) { 1107 errorCode = U_MEMORY_ALLOCATION_ERROR; 1108 } 1109 return result; 1110 } 1111 1112 /* static */ StandardFunctions::DateTimeFactory* StandardFunctions::DateTimeFactory::date(UErrorCode& errorCode) { 1113 NULL_ON_ERROR(errorCode); 1114 1115 DateTimeFactory* result = new DateTimeFactory(DateTimeType::Date); 1116 if (result == nullptr) { 1117 errorCode = U_MEMORY_ALLOCATION_ERROR; 1118 } 1119 return result; 1120 } 1121 1122 /* static */ StandardFunctions::DateTimeFactory* StandardFunctions::DateTimeFactory::time(UErrorCode& errorCode) { 1123 NULL_ON_ERROR(errorCode); 1124 1125 DateTimeFactory* result = new DateTimeFactory(DateTimeType::Time); 1126 if (result == nullptr) { 1127 errorCode = U_MEMORY_ALLOCATION_ERROR; 1128 } 1129 return result; 1130 } 1131 1132 Formatter* StandardFunctions::DateTimeFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { 1133 NULL_ON_ERROR(errorCode); 1134 1135 Formatter* result = new StandardFunctions::DateTime(locale, type); 1136 if (result == nullptr) { 1137 errorCode = U_MEMORY_ALLOCATION_ERROR; 1138 } 1139 return result; 1140 } 1141 1142 // DateFormat parsers that are shared across threads 1143 static DateFormat* dateParser = nullptr; 1144 static DateFormat* dateTimeParser = nullptr; 1145 static DateFormat* dateTimeUTCParser = nullptr; 1146 static DateFormat* dateTimeZoneParser = nullptr; 1147 static icu::UInitOnce gMF2DateParsersInitOnce {}; 1148 1149 // Clean up shared DateFormat objects 1150 static UBool mf2_date_parsers_cleanup() { 1151 if (dateParser != nullptr) { 1152 delete dateParser; 1153 dateParser = nullptr; 1154 } 1155 if (dateTimeParser != nullptr) { 1156 delete dateTimeParser; 1157 dateTimeParser = nullptr; 1158 } 1159 if (dateTimeUTCParser != nullptr) { 1160 delete dateTimeUTCParser; 1161 dateTimeUTCParser = nullptr; 1162 } 1163 if (dateTimeZoneParser != nullptr) { 1164 delete dateTimeZoneParser; 1165 dateTimeZoneParser = nullptr; 1166 } 1167 return true; 1168 } 1169 1170 // Initialize DateFormat objects used for parsing date literals 1171 static void initDateParsersOnce(UErrorCode& errorCode) { 1172 U_ASSERT(dateParser == nullptr); 1173 U_ASSERT(dateTimeParser == nullptr); 1174 U_ASSERT(dateTimeUTCParser == nullptr); 1175 U_ASSERT(dateTimeZoneParser == nullptr); 1176 1177 // Handles ISO 8601 date 1178 dateParser = new SimpleDateFormat(UnicodeString("YYYY-MM-dd"), errorCode); 1179 // Handles ISO 8601 datetime without time zone 1180 dateTimeParser = new SimpleDateFormat(UnicodeString("YYYY-MM-dd'T'HH:mm:ss"), errorCode); 1181 // Handles ISO 8601 datetime with 'Z' to denote UTC 1182 dateTimeUTCParser = new SimpleDateFormat(UnicodeString("YYYY-MM-dd'T'HH:mm:ssZ"), errorCode); 1183 // Handles ISO 8601 datetime with timezone offset; 'zzzz' denotes timezone offset 1184 dateTimeZoneParser = new SimpleDateFormat(UnicodeString("YYYY-MM-dd'T'HH:mm:sszzzz"), errorCode); 1185 1186 if (!dateParser || !dateTimeParser || !dateTimeUTCParser || !dateTimeZoneParser) { 1187 errorCode = U_MEMORY_ALLOCATION_ERROR; 1188 mf2_date_parsers_cleanup(); 1189 return; 1190 } 1191 ucln_i18n_registerCleanup(UCLN_I18N_MF2_DATE_PARSERS, mf2_date_parsers_cleanup); 1192 } 1193 1194 // Lazily initialize DateFormat objects used for parsing date literals 1195 static void initDateParsers(UErrorCode& errorCode) { 1196 CHECK_ERROR(errorCode); 1197 1198 umtx_initOnce(gMF2DateParsersInitOnce, &initDateParsersOnce, errorCode); 1199 } 1200 1201 // From https://github.com/unicode-org/message-format-wg/blob/main/spec/registry.md#date-and-time-operands : 1202 // "A date/time literal value is a non-empty string consisting of an ISO 8601 date, or 1203 // an ISO 8601 datetime optionally followed by a timezone offset." 1204 UDate StandardFunctions::DateTime::tryPatterns(const UnicodeString& sourceStr, 1205 UErrorCode& errorCode) const { 1206 if (U_FAILURE(errorCode)) { 1207 return 0; 1208 } 1209 // Handle ISO 8601 datetime (tryTimeZonePatterns() handles the case 1210 // where a timezone offset follows) 1211 if (sourceStr.length() > 10) { 1212 return dateTimeParser->parse(sourceStr, errorCode); 1213 } 1214 // Handle ISO 8601 date 1215 return dateParser->parse(sourceStr, errorCode); 1216 } 1217 1218 // See comment on tryPatterns() for spec reference 1219 UDate StandardFunctions::DateTime::tryTimeZonePatterns(const UnicodeString& sourceStr, 1220 UErrorCode& errorCode) const { 1221 if (U_FAILURE(errorCode)) { 1222 return 0; 1223 } 1224 int32_t len = sourceStr.length(); 1225 if (len > 0 && sourceStr[len] == 'Z') { 1226 return dateTimeUTCParser->parse(sourceStr, errorCode); 1227 } 1228 return dateTimeZoneParser->parse(sourceStr, errorCode); 1229 } 1230 1231 static TimeZone* createTimeZone(const DateInfo& dateInfo, UErrorCode& errorCode) { 1232 NULL_ON_ERROR(errorCode); 1233 1234 TimeZone* tz; 1235 if (dateInfo.zoneId.isEmpty()) { 1236 // Floating time value -- use default time zone 1237 tz = TimeZone::createDefault(); 1238 } else { 1239 tz = TimeZone::createTimeZone(dateInfo.zoneId); 1240 } 1241 if (tz == nullptr) { 1242 errorCode = U_MEMORY_ALLOCATION_ERROR; 1243 } 1244 return tz; 1245 } 1246 1247 // Returns true iff `sourceStr` ends in an offset like +03:30 or -06:00 1248 // (This function is just used to determine whether to call tryPatterns() 1249 // or tryTimeZonePatterns(); tryTimeZonePatterns() checks fully that the 1250 // string matches the expected format) 1251 static bool hasTzOffset(const UnicodeString& sourceStr) { 1252 int32_t len = sourceStr.length(); 1253 1254 if (len <= 6) { 1255 return false; 1256 } 1257 return ((sourceStr[len - 6] == PLUS || sourceStr[len - 6] == HYPHEN) 1258 && sourceStr[len - 3] == COLON); 1259 } 1260 1261 // Note: `calendar` option to :datetime not implemented yet; 1262 // Gregorian calendar is assumed 1263 DateInfo StandardFunctions::DateTime::createDateInfoFromString(const UnicodeString& sourceStr, 1264 UErrorCode& errorCode) const { 1265 if (U_FAILURE(errorCode)) { 1266 return {}; 1267 } 1268 1269 UDate absoluteDate; 1270 1271 // Check if the string has a time zone part 1272 int32_t indexOfZ = sourceStr.indexOf('Z'); 1273 int32_t indexOfPlus = sourceStr.lastIndexOf('+'); 1274 int32_t indexOfMinus = sourceStr.lastIndexOf('-'); 1275 int32_t indexOfSign = indexOfPlus > -1 ? indexOfPlus : indexOfMinus; 1276 bool isTzOffset = hasTzOffset(sourceStr); 1277 bool isGMT = indexOfZ > 0; 1278 UnicodeString offsetPart; 1279 bool hasTimeZone = isTzOffset || isGMT; 1280 1281 if (!hasTimeZone) { 1282 // No time zone; parse the date and time 1283 absoluteDate = tryPatterns(sourceStr, errorCode); 1284 if (U_FAILURE(errorCode)) { 1285 return {}; 1286 } 1287 } else { 1288 // Try to split into time zone and non-time-zone parts 1289 UnicodeString dateTimePart; 1290 if (isGMT) { 1291 dateTimePart = sourceStr.tempSubString(0, indexOfZ); 1292 } else { 1293 dateTimePart = sourceStr.tempSubString(0, indexOfSign); 1294 } 1295 // Parse the date from the date/time part 1296 tryPatterns(dateTimePart, errorCode); 1297 // Failure -- can't parse this string 1298 if (U_FAILURE(errorCode)) { 1299 return {}; 1300 } 1301 // Success -- now parse the time zone part 1302 if (isGMT) { 1303 dateTimePart += UnicodeString("GMT"); 1304 absoluteDate = tryTimeZonePatterns(dateTimePart, errorCode); 1305 if (U_FAILURE(errorCode)) { 1306 return {}; 1307 } 1308 } else { 1309 // Try to parse time zone in offset format: [+-]nn:nn 1310 absoluteDate = tryTimeZonePatterns(sourceStr, errorCode); 1311 if (U_FAILURE(errorCode)) { 1312 return {}; 1313 } 1314 offsetPart = sourceStr.tempSubString(indexOfSign, sourceStr.length()); 1315 } 1316 } 1317 1318 // If the time zone was provided, get its canonical ID, 1319 // in order to return it in the DateInfo 1320 UnicodeString canonicalID; 1321 if (hasTimeZone) { 1322 UnicodeString tzID("GMT"); 1323 if (!isGMT) { 1324 tzID += offsetPart; 1325 } 1326 TimeZone::getCanonicalID(tzID, canonicalID, errorCode); 1327 if (U_FAILURE(errorCode)) { 1328 return {}; 1329 } 1330 } 1331 1332 return { absoluteDate, canonicalID }; 1333 } 1334 1335 void formatDateWithDefaults(const Locale& locale, 1336 const DateInfo& dateInfo, 1337 UnicodeString& result, 1338 UErrorCode& errorCode) { 1339 CHECK_ERROR(errorCode); 1340 1341 LocalPointer<DateFormat> df(defaultDateTimeInstance(locale, errorCode)); 1342 CHECK_ERROR(errorCode); 1343 1344 df->adoptTimeZone(createTimeZone(dateInfo, errorCode)); 1345 CHECK_ERROR(errorCode); 1346 df->format(dateInfo.date, result, nullptr, errorCode); 1347 } 1348 1349 FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& toFormat, 1350 FunctionOptions&& opts, 1351 UErrorCode& errorCode) const { 1352 if (U_FAILURE(errorCode)) { 1353 return {}; 1354 } 1355 1356 // Argument must be present 1357 if (!toFormat.canFormat()) { 1358 errorCode = U_MF_OPERAND_MISMATCH_ERROR; 1359 return std::move(toFormat); 1360 } 1361 1362 LocalPointer<DateFormat> df; 1363 Formattable opt; 1364 1365 DateFormat::EStyle dateStyle = DateFormat::kShort; 1366 DateFormat::EStyle timeStyle = DateFormat::kShort; 1367 1368 UnicodeString dateStyleName("dateStyle"); 1369 UnicodeString timeStyleName("timeStyle"); 1370 UnicodeString styleName("style"); 1371 1372 bool hasDateStyleOption = opts.getFunctionOption(dateStyleName, opt); 1373 bool hasTimeStyleOption = opts.getFunctionOption(timeStyleName, opt); 1374 bool noOptions = opts.optionsCount() == 0; 1375 1376 bool useStyle = (type == DateTimeFactory::DateTimeType::DateTime 1377 && (hasDateStyleOption || hasTimeStyleOption 1378 || noOptions)) 1379 || (type != DateTimeFactory::DateTimeType::DateTime); 1380 1381 bool useDate = type == DateTimeFactory::DateTimeType::Date 1382 || (type == DateTimeFactory::DateTimeType::DateTime 1383 && hasDateStyleOption); 1384 bool useTime = type == DateTimeFactory::DateTimeType::Time 1385 || (type == DateTimeFactory::DateTimeType::DateTime 1386 && hasTimeStyleOption); 1387 1388 if (useStyle) { 1389 // Extract style options 1390 if (type == DateTimeFactory::DateTimeType::DateTime) { 1391 // Note that the options-getting has to be repeated across the three cases, 1392 // since `:datetime` uses "dateStyle"/"timeStyle" and `:date` and `:time` 1393 // use "style" 1394 dateStyle = stringToStyle(getFunctionOption(toFormat, opts, dateStyleName), errorCode); 1395 timeStyle = stringToStyle(getFunctionOption(toFormat, opts, timeStyleName), errorCode); 1396 1397 if (useDate && !useTime) { 1398 df.adoptInstead(DateFormat::createDateInstance(dateStyle, locale)); 1399 } else if (useTime && !useDate) { 1400 df.adoptInstead(DateFormat::createTimeInstance(timeStyle, locale)); 1401 } else { 1402 df.adoptInstead(DateFormat::createDateTimeInstance(dateStyle, timeStyle, locale)); 1403 } 1404 } else if (type == DateTimeFactory::DateTimeType::Date) { 1405 dateStyle = stringToStyle(getFunctionOption(toFormat, opts, styleName), errorCode); 1406 df.adoptInstead(DateFormat::createDateInstance(dateStyle, locale)); 1407 } else { 1408 // :time 1409 timeStyle = stringToStyle(getFunctionOption(toFormat, opts, styleName), errorCode); 1410 df.adoptInstead(DateFormat::createTimeInstance(timeStyle, locale)); 1411 } 1412 } else { 1413 // Build up a skeleton based on the field options, then use that to 1414 // create the date formatter 1415 1416 UnicodeString skeleton; 1417 #define ADD_PATTERN(s) skeleton += UnicodeString(s) 1418 if (U_SUCCESS(errorCode)) { 1419 // Year 1420 UnicodeString year = getFunctionOption(toFormat, opts, options::YEAR, errorCode); 1421 if (U_FAILURE(errorCode)) { 1422 errorCode = U_ZERO_ERROR; 1423 } else { 1424 useDate = true; 1425 if (year == options::TWO_DIGIT) { 1426 ADD_PATTERN("YY"); 1427 } else if (year == options::NUMERIC) { 1428 ADD_PATTERN("YYYY"); 1429 } 1430 } 1431 // Month 1432 UnicodeString month = getFunctionOption(toFormat, opts, options::MONTH, errorCode); 1433 if (U_FAILURE(errorCode)) { 1434 errorCode = U_ZERO_ERROR; 1435 } else { 1436 useDate = true; 1437 /* numeric, 2-digit, long, short, narrow */ 1438 if (month == options::LONG) { 1439 ADD_PATTERN("MMMM"); 1440 } else if (month == options::SHORT) { 1441 ADD_PATTERN("MMM"); 1442 } else if (month == options::NARROW) { 1443 ADD_PATTERN("MMMMM"); 1444 } else if (month == options::NUMERIC) { 1445 ADD_PATTERN("M"); 1446 } else if (month == options::TWO_DIGIT) { 1447 ADD_PATTERN("MM"); 1448 } 1449 } 1450 // Weekday 1451 UnicodeString weekday = getFunctionOption(toFormat, opts, options::WEEKDAY, errorCode); 1452 if (U_FAILURE(errorCode)) { 1453 errorCode = U_ZERO_ERROR; 1454 } else { 1455 useDate = true; 1456 if (weekday == options::LONG) { 1457 ADD_PATTERN("EEEE"); 1458 } else if (weekday == options::SHORT) { 1459 ADD_PATTERN("EEEEE"); 1460 } else if (weekday == options::NARROW) { 1461 ADD_PATTERN("EEEEE"); 1462 } 1463 } 1464 // Day 1465 UnicodeString day = getFunctionOption(toFormat, opts, options::DAY, errorCode); 1466 if (U_FAILURE(errorCode)) { 1467 errorCode = U_ZERO_ERROR; 1468 } else { 1469 useDate = true; 1470 if (day == options::NUMERIC) { 1471 ADD_PATTERN("d"); 1472 } else if (day == options::TWO_DIGIT) { 1473 ADD_PATTERN("dd"); 1474 } 1475 } 1476 // Hour 1477 UnicodeString hour = getFunctionOption(toFormat, opts, options::HOUR, errorCode); 1478 if (U_FAILURE(errorCode)) { 1479 errorCode = U_ZERO_ERROR; 1480 } else { 1481 useTime = true; 1482 if (hour == options::NUMERIC) { 1483 ADD_PATTERN("h"); 1484 } else if (hour == options::TWO_DIGIT) { 1485 ADD_PATTERN("hh"); 1486 } 1487 } 1488 // Minute 1489 UnicodeString minute = getFunctionOption(toFormat, opts, options::MINUTE, errorCode); 1490 if (U_FAILURE(errorCode)) { 1491 errorCode = U_ZERO_ERROR; 1492 } else { 1493 useTime = true; 1494 if (minute == options::NUMERIC) { 1495 ADD_PATTERN("m"); 1496 } else if (minute == options::TWO_DIGIT) { 1497 ADD_PATTERN("mm"); 1498 } 1499 } 1500 // Second 1501 UnicodeString second = getFunctionOption(toFormat, opts, options::SECOND, errorCode); 1502 if (U_FAILURE(errorCode)) { 1503 errorCode = U_ZERO_ERROR; 1504 } else { 1505 useTime = true; 1506 if (second == options::NUMERIC) { 1507 ADD_PATTERN("s"); 1508 } else if (second == options::TWO_DIGIT) { 1509 ADD_PATTERN("ss"); 1510 } 1511 } 1512 } 1513 /* 1514 TODO 1515 fractionalSecondDigits 1516 hourCycle 1517 timeZoneName 1518 era 1519 */ 1520 df.adoptInstead(DateFormat::createInstanceForSkeleton(skeleton, errorCode)); 1521 } 1522 1523 if (U_FAILURE(errorCode)) { 1524 return {}; 1525 } 1526 if (!df.isValid()) { 1527 errorCode = U_MEMORY_ALLOCATION_ERROR; 1528 return {}; 1529 } 1530 1531 UnicodeString result; 1532 const Formattable& source = toFormat.asFormattable(); 1533 switch (source.getType()) { 1534 case UFMT_STRING: { 1535 // Lazily initialize date parsers used for parsing date literals 1536 initDateParsers(errorCode); 1537 if (U_FAILURE(errorCode)) { 1538 return {}; 1539 } 1540 1541 const UnicodeString& sourceStr = source.getString(errorCode); 1542 U_ASSERT(U_SUCCESS(errorCode)); 1543 1544 DateInfo dateInfo = createDateInfoFromString(sourceStr, errorCode); 1545 if (U_FAILURE(errorCode)) { 1546 errorCode = U_MF_OPERAND_MISMATCH_ERROR; 1547 return {}; 1548 } 1549 df->adoptTimeZone(createTimeZone(dateInfo, errorCode)); 1550 1551 // Use the parsed date as the source value 1552 // in the returned FormattedPlaceholder; this is necessary 1553 // so the date can be re-formatted 1554 df->format(dateInfo.date, result, 0, errorCode); 1555 toFormat = FormattedPlaceholder(message2::Formattable(std::move(dateInfo)), 1556 toFormat.getFallback()); 1557 break; 1558 } 1559 case UFMT_DATE: { 1560 const DateInfo* dateInfo = source.getDate(errorCode); 1561 if (U_SUCCESS(errorCode)) { 1562 // If U_SUCCESS(errorCode), then source.getDate() returned 1563 // a non-null pointer 1564 df->adoptTimeZone(createTimeZone(*dateInfo, errorCode)); 1565 df->format(dateInfo->date, result, 0, errorCode); 1566 if (U_FAILURE(errorCode)) { 1567 if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) { 1568 errorCode = U_MF_OPERAND_MISMATCH_ERROR; 1569 } 1570 } 1571 } 1572 break; 1573 } 1574 // Any other cases are an error 1575 default: { 1576 errorCode = U_MF_OPERAND_MISMATCH_ERROR; 1577 break; 1578 } 1579 } 1580 if (U_FAILURE(errorCode)) { 1581 return {}; 1582 } 1583 return FormattedPlaceholder(toFormat, std::move(opts), FormattedValue(std::move(result))); 1584 } 1585 1586 StandardFunctions::DateTimeFactory::~DateTimeFactory() {} 1587 StandardFunctions::DateTime::~DateTime() {} 1588 1589 // --------- TextFactory 1590 1591 Selector* StandardFunctions::TextFactory::createSelector(const Locale& locale, UErrorCode& errorCode) const { 1592 Selector* result = new TextSelector(locale); 1593 if (result == nullptr) { 1594 errorCode = U_MEMORY_ALLOCATION_ERROR; 1595 return nullptr; 1596 } 1597 return result; 1598 } 1599 1600 void StandardFunctions::TextSelector::selectKey(FormattedPlaceholder&& toFormat, 1601 FunctionOptions&& opts, 1602 const UnicodeString* keys, 1603 int32_t keysLen, 1604 UnicodeString* prefs, 1605 int32_t& prefsLen, 1606 UErrorCode& errorCode) const { 1607 // No options 1608 (void) opts; 1609 1610 CHECK_ERROR(errorCode); 1611 1612 // Just compares the key and value as strings 1613 1614 // Argument must be present 1615 if (!toFormat.canFormat()) { 1616 errorCode = U_MF_SELECTOR_ERROR; 1617 return; 1618 } 1619 1620 prefsLen = 0; 1621 1622 // Convert to string 1623 const UnicodeString& formattedValue = toFormat.formatToString(locale, errorCode); 1624 if (U_FAILURE(errorCode)) { 1625 return; 1626 } 1627 // Normalize result 1628 UnicodeString normalized = normalizeNFC(formattedValue); 1629 1630 for (int32_t i = 0; i < keysLen; i++) { 1631 if (keys[i] == normalized) { 1632 prefs[0] = keys[i]; 1633 prefsLen = 1; 1634 break; 1635 } 1636 } 1637 } 1638 1639 StandardFunctions::TextFactory::~TextFactory() {} 1640 StandardFunctions::TextSelector::~TextSelector() {} 1641 1642 // ------------ TestFormatFactory 1643 1644 Formatter* StandardFunctions::TestFormatFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { 1645 NULL_ON_ERROR(errorCode); 1646 1647 // Results are not locale-dependent 1648 (void) locale; 1649 1650 Formatter* result = new TestFormat(); 1651 if (result == nullptr) { 1652 errorCode = U_MEMORY_ALLOCATION_ERROR; 1653 } 1654 return result; 1655 } 1656 1657 StandardFunctions::TestFormatFactory::~TestFormatFactory() {} 1658 StandardFunctions::TestFormat::~TestFormat() {} 1659 1660 // Extract numeric value from a Formattable or, if it's a string, 1661 // parse it as a number according to the MF2 `number-literal` grammar production 1662 double formattableToNumber(const Formattable& arg, UErrorCode& status) { 1663 if (U_FAILURE(status)) { 1664 return 0; 1665 } 1666 1667 double result = 0; 1668 1669 switch (arg.getType()) { 1670 case UFMT_DOUBLE: { 1671 result = arg.getDouble(status); 1672 U_ASSERT(U_SUCCESS(status)); 1673 break; 1674 } 1675 case UFMT_LONG: { 1676 result = (double) arg.getLong(status); 1677 U_ASSERT(U_SUCCESS(status)); 1678 break; 1679 } 1680 case UFMT_INT64: { 1681 result = (double) arg.getInt64(status); 1682 U_ASSERT(U_SUCCESS(status)); 1683 break; 1684 } 1685 case UFMT_STRING: { 1686 // Try to parse the string as a number 1687 result = parseNumberLiteral(arg, status); 1688 if (U_FAILURE(status)) { 1689 status = U_MF_OPERAND_MISMATCH_ERROR; 1690 } 1691 break; 1692 } 1693 default: { 1694 // Other types can't be parsed as a number 1695 status = U_MF_OPERAND_MISMATCH_ERROR; 1696 break; 1697 } 1698 } 1699 return result; 1700 } 1701 1702 1703 /* static */ void StandardFunctions::TestFormat::testFunctionParameters(const FormattedPlaceholder& arg, 1704 const FunctionOptions& options, 1705 int32_t& decimalPlaces, 1706 bool& failsFormat, 1707 bool& failsSelect, 1708 double& input, 1709 UErrorCode& status) { 1710 CHECK_ERROR(status); 1711 1712 // 1. Let DecimalPlaces be 0. 1713 decimalPlaces = 0; 1714 1715 // 2. Let FailsFormat be false. 1716 failsFormat = false; 1717 1718 // 3. Let FailsSelect be false. 1719 failsSelect = false; 1720 1721 // 4. Let arg be the resolved value of the expression operand. 1722 // (already true) 1723 1724 // Step 5 omitted because composition isn't fully implemented yet 1725 // 6. Else if arg is a numerical value or a string matching the number-literal production, then 1726 input = formattableToNumber(arg.asFormattable(), status); 1727 if (U_FAILURE(status)) { 1728 // 7. Else, 1729 // 7i. Emit "bad-input" Resolution Error. 1730 status = U_MF_OPERAND_MISMATCH_ERROR; 1731 // 7ii. Use a fallback value as the resolved value of the expression. 1732 // Further steps of this algorithm are not followed. 1733 } 1734 // 8. If the decimalPlaces option is set, then 1735 Formattable opt; 1736 if (options.getFunctionOption(options::DECIMAL_PLACES, opt)) { 1737 // 8i. If its value resolves to a numerical integer value 0 or 1 1738 // or their corresponding string representations '0' or '1', then 1739 double decimalPlacesInput = formattableToNumber(opt, status); 1740 if (U_SUCCESS(status)) { 1741 if (decimalPlacesInput == 0 || decimalPlacesInput == 1) { 1742 // 8ia. Set DecimalPlaces to be the numerical value of the option. 1743 decimalPlaces = decimalPlacesInput; 1744 } 1745 } 1746 // 8ii. Else if its value is not an unresolved value set by option resolution, 1747 else { 1748 // 8iia. Emit "bad-option" Resolution Error. 1749 status = U_MF_BAD_OPTION; 1750 // 8iib. Use a fallback value as the resolved value of the expression. 1751 } 1752 } 1753 // 9. If the fails option is set, then 1754 Formattable failsOpt; 1755 if (options.getFunctionOption(options::FAILS, failsOpt)) { 1756 UnicodeString failsString = failsOpt.getString(status); 1757 if (U_SUCCESS(status)) { 1758 // 9i. If its value resolves to the string 'always', then 1759 if (failsString == u"always") { 1760 // 9ia. Set FailsFormat to be true 1761 failsFormat = true; 1762 // 9ib. Set FailsSelect to be true. 1763 failsSelect = true; 1764 } 1765 // 9ii. Else if its value resolves to the string "format", then 1766 else if (failsString == u"format") { 1767 // 9ia. Set FailsFormat to be true 1768 failsFormat = true; 1769 } 1770 // 9iii. Else if its value resolves to the string "select", then 1771 else if (failsString == u"select") { 1772 // 9iiia. Set FailsSelect to be true. 1773 failsSelect = true; 1774 } 1775 // 9iv. Else if its value does not resolve to the string "never", then 1776 else if (failsString != u"never") { 1777 // 9iv(a). Emit "bad-option" Resolution Error. 1778 status = U_MF_BAD_OPTION; 1779 } 1780 } else { 1781 // 9iv. again 1782 status = U_MF_BAD_OPTION; 1783 } 1784 } 1785 } 1786 1787 FormattedPlaceholder StandardFunctions::TestFormat::format(FormattedPlaceholder&& arg, 1788 FunctionOptions&& options, 1789 UErrorCode& status) const{ 1790 1791 int32_t decimalPlaces; 1792 bool failsFormat; 1793 bool failsSelect; 1794 double input; 1795 1796 testFunctionParameters(arg, options, decimalPlaces, 1797 failsFormat, failsSelect, input, status); 1798 if (U_FAILURE(status)) { 1799 return FormattedPlaceholder(arg.getFallback()); 1800 } 1801 1802 // If FailsFormat is true, attempting to format the placeholder to any 1803 // formatting target will fail. 1804 if (failsFormat) { 1805 status = U_MF_FORMATTING_ERROR; 1806 return FormattedPlaceholder(arg.getFallback()); 1807 } 1808 UnicodeString result; 1809 // When :test:function is used as a formatter, a placeholder resolving to a value 1810 // with a :test:function expression is formatted as a concatenation of the following parts: 1811 // 1. If Input is less than 0, the character - U+002D Hyphen-Minus. 1812 if (input < 0) { 1813 result += HYPHEN; 1814 } 1815 // 2. The truncated absolute integer value of Input, i.e. floor(abs(Input)), formatted as a 1816 // sequence of decimal digit characters (U+0030...U+0039). 1817 char buffer[256]; 1818 bool ignore; 1819 int ignoreLen; 1820 int ignorePoint; 1821 double_conversion::DoubleToStringConverter::DoubleToAscii(floor(abs(input)), 1822 double_conversion::DoubleToStringConverter::DtoaMode::SHORTEST, 1823 0, 1824 buffer, 1825 256, 1826 &ignore, 1827 &ignoreLen, 1828 &ignorePoint); 1829 result += UnicodeString(buffer); 1830 // 3. If DecimalPlaces is 1, then 1831 if (decimalPlaces == 1) { 1832 // 3i. The character . U+002E Full Stop. 1833 result += u"."; 1834 // 3ii. The single decimal digit character representing the value 1835 // floor((abs(Input) - floor(abs(Input))) * 10) 1836 int32_t val = floor((abs(input) - floor(abs(input)) * 10)); 1837 result += digitToChar(val, status); 1838 U_ASSERT(U_SUCCESS(status)); 1839 } 1840 return FormattedPlaceholder(result); 1841 } 1842 1843 // ------------ TestSelectFactory 1844 1845 StandardFunctions::TestSelectFactory::~TestSelectFactory() {} 1846 StandardFunctions::TestSelect::~TestSelect() {} 1847 1848 Selector* StandardFunctions::TestSelectFactory::createSelector(const Locale& locale, 1849 UErrorCode& errorCode) const { 1850 NULL_ON_ERROR(errorCode); 1851 1852 // Results are not locale-dependent 1853 (void) locale; 1854 1855 Selector* result = new TestSelect(); 1856 if (result == nullptr) { 1857 errorCode = U_MEMORY_ALLOCATION_ERROR; 1858 } 1859 return result; 1860 } 1861 1862 void StandardFunctions::TestSelect::selectKey(FormattedPlaceholder&& val, 1863 FunctionOptions&& options, 1864 const UnicodeString* keys, 1865 int32_t keysLen, 1866 UnicodeString* prefs, 1867 int32_t& prefsLen, 1868 UErrorCode& status) const { 1869 int32_t decimalPlaces; 1870 bool failsFormat; 1871 bool failsSelect; 1872 double input; 1873 1874 TestFormat::testFunctionParameters(val, options, decimalPlaces, 1875 failsFormat, failsSelect, input, status); 1876 1877 if (U_FAILURE(status)) { 1878 return; 1879 } 1880 1881 if (failsSelect) { 1882 status = U_MF_SELECTOR_ERROR; 1883 return; 1884 } 1885 1886 // If the Input is 1 and DecimalPlaces is 1, the method will return some slice 1887 // of the list « '1.0', '1' », depending on whether those values are included in keys. 1888 bool include1point0 = false; 1889 bool include1 = false; 1890 if (input == 1 && decimalPlaces == 1) { 1891 include1point0 = true; 1892 include1 = true; 1893 } else if (input == 1 && decimalPlaces == 0) { 1894 include1 = true; 1895 } 1896 1897 // If the Input is 1 and DecimalPlaces is 0, the method will return the list « '1' » if 1898 // keys includes '1', or an empty list otherwise. 1899 // If the Input is any other value, the method will return an empty list. 1900 for (int32_t i = 0; i < keysLen; i++) { 1901 if ((keys[i] == u"1" && include1) 1902 || (keys[i] == u"1.0" && include1point0)) { 1903 prefs[prefsLen] = keys[i]; 1904 prefsLen++; 1905 } 1906 } 1907 } 1908 1909 } // namespace message2 1910 U_NAMESPACE_END 1911 1912 #endif /* #if !UCONFIG_NO_MF2 */ 1913 1914 #endif /* #if !UCONFIG_NO_FORMATTING */ 1915 1916 #endif /* #if !UCONFIG_NO_NORMALIZATION */