smpdtfmt.cpp (177678B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 1997-2016, International Business Machines Corporation and * 6 * others. All Rights Reserved. * 7 ******************************************************************************* 8 * 9 * File SMPDTFMT.CPP 10 * 11 * Modification History: 12 * 13 * Date Name Description 14 * 02/19/97 aliu Converted from java. 15 * 03/31/97 aliu Modified extensively to work with 50 locales. 16 * 04/01/97 aliu Added support for centuries. 17 * 07/09/97 helena Made ParsePosition into a class. 18 * 07/21/98 stephen Added initializeDefaultCentury. 19 * Removed getZoneIndex (added in DateFormatSymbols) 20 * Removed subParseLong 21 * Removed chk 22 * 02/22/99 stephen Removed character literals for EBCDIC safety 23 * 10/14/99 aliu Updated 2-digit year parsing so that only "00" thru 24 * "99" are recognized. {j28 4182066} 25 * 11/15/99 weiv Added support for week of year/day of week format 26 ******************************************************************************** 27 */ 28 29 #define ZID_KEY_MAX 128 30 31 #include "unicode/utypes.h" 32 33 #if !UCONFIG_NO_FORMATTING 34 #include "unicode/smpdtfmt.h" 35 #include "unicode/dtfmtsym.h" 36 #include "unicode/ures.h" 37 #include "unicode/msgfmt.h" 38 #include "unicode/calendar.h" 39 #include "unicode/gregocal.h" 40 #include "unicode/timezone.h" 41 #include "unicode/decimfmt.h" 42 #include "unicode/dcfmtsym.h" 43 #include "unicode/uchar.h" 44 #include "unicode/uniset.h" 45 #include "unicode/ustring.h" 46 #include "unicode/basictz.h" 47 #include "unicode/simpleformatter.h" 48 #include "unicode/simplenumberformatter.h" 49 #include "unicode/simpletz.h" 50 #include "unicode/rbtz.h" 51 #include "unicode/tzfmt.h" 52 #include "unicode/ucasemap.h" 53 #include "unicode/utf16.h" 54 #include "unicode/vtzone.h" 55 #include "unicode/udisplaycontext.h" 56 #include "unicode/brkiter.h" 57 #include "unicode/rbnf.h" 58 #include "unicode/dtptngen.h" 59 #include "uresimp.h" 60 #include "olsontz.h" 61 #include "patternprops.h" 62 #include "fphdlimp.h" 63 #include "hebrwcal.h" 64 #include "cstring.h" 65 #include "uassert.h" 66 #include "cmemory.h" 67 #include "umutex.h" 68 #include "mutex.h" 69 #include <float.h> 70 #include "smpdtfst.h" 71 #include "sharednumberformat.h" 72 #include "ucasemap_imp.h" 73 #include "ustr_imp.h" 74 #include "charstr.h" 75 #include "uvector.h" 76 #include "cstr.h" 77 #include "dayperiodrules.h" 78 #include "tznames_impl.h" // ZONE_NAME_U16_MAX 79 #include "number_utypes.h" 80 #include "chnsecal.h" 81 #include "dangical.h" 82 #include "japancal.h" 83 #include <typeinfo> 84 85 #if defined( U_DEBUG_CALSVC ) || defined (U_DEBUG_CAL) 86 #include <stdio.h> 87 #endif 88 89 // ***************************************************************************** 90 // class SimpleDateFormat 91 // ***************************************************************************** 92 93 U_NAMESPACE_BEGIN 94 95 /** 96 * Last-resort string to use for "GMT" when constructing time zone strings. 97 */ 98 // For time zones that have no names, use strings GMT+minutes and 99 // GMT-minutes. For instance, in France the time zone is GMT+60. 100 // Also accepted are GMT+H:MM or GMT-H:MM. 101 // Currently not being used 102 //static const char16_t gGmt[] = {0x0047, 0x004D, 0x0054, 0x0000}; // "GMT" 103 //static const char16_t gGmtPlus[] = {0x0047, 0x004D, 0x0054, 0x002B, 0x0000}; // "GMT+" 104 //static const char16_t gGmtMinus[] = {0x0047, 0x004D, 0x0054, 0x002D, 0x0000}; // "GMT-" 105 //static const char16_t gDefGmtPat[] = {0x0047, 0x004D, 0x0054, 0x007B, 0x0030, 0x007D, 0x0000}; /* GMT{0} */ 106 //static const char16_t gDefGmtNegHmsPat[] = {0x002D, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x003A, 0x0073, 0x0073, 0x0000}; /* -HH:mm:ss */ 107 //static const char16_t gDefGmtNegHmPat[] = {0x002D, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x0000}; /* -HH:mm */ 108 //static const char16_t gDefGmtPosHmsPat[] = {0x002B, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x003A, 0x0073, 0x0073, 0x0000}; /* +HH:mm:ss */ 109 //static const char16_t gDefGmtPosHmPat[] = {0x002B, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x0000}; /* +HH:mm */ 110 //static const char16_t gUt[] = {0x0055, 0x0054, 0x0000}; // "UT" 111 //static const char16_t gUtc[] = {0x0055, 0x0054, 0x0043, 0x0000}; // "UT" 112 113 typedef enum GmtPatSize { 114 kGmtLen = 3, 115 kGmtPatLen = 6, 116 kNegHmsLen = 9, 117 kNegHmLen = 6, 118 kPosHmsLen = 9, 119 kPosHmLen = 6, 120 kUtLen = 2, 121 kUtcLen = 3 122 } GmtPatSize; 123 124 // Stuff needed for numbering system overrides 125 126 typedef enum OvrStrType { 127 kOvrStrDate = 0, 128 kOvrStrTime = 1, 129 kOvrStrBoth = 2 130 } OvrStrType; 131 132 static const UDateFormatField kDateFields[] = { 133 UDAT_YEAR_FIELD, 134 UDAT_MONTH_FIELD, 135 UDAT_DATE_FIELD, 136 UDAT_DAY_OF_YEAR_FIELD, 137 UDAT_DAY_OF_WEEK_IN_MONTH_FIELD, 138 UDAT_WEEK_OF_YEAR_FIELD, 139 UDAT_WEEK_OF_MONTH_FIELD, 140 UDAT_YEAR_WOY_FIELD, 141 UDAT_EXTENDED_YEAR_FIELD, 142 UDAT_JULIAN_DAY_FIELD, 143 UDAT_STANDALONE_DAY_FIELD, 144 UDAT_STANDALONE_MONTH_FIELD, 145 UDAT_QUARTER_FIELD, 146 UDAT_STANDALONE_QUARTER_FIELD, 147 UDAT_YEAR_NAME_FIELD, 148 UDAT_RELATED_YEAR_FIELD }; 149 static const int8_t kDateFieldsCount = 16; 150 151 static const UDateFormatField kTimeFields[] = { 152 UDAT_HOUR_OF_DAY1_FIELD, 153 UDAT_HOUR_OF_DAY0_FIELD, 154 UDAT_MINUTE_FIELD, 155 UDAT_SECOND_FIELD, 156 UDAT_FRACTIONAL_SECOND_FIELD, 157 UDAT_HOUR1_FIELD, 158 UDAT_HOUR0_FIELD, 159 UDAT_MILLISECONDS_IN_DAY_FIELD, 160 UDAT_TIMEZONE_RFC_FIELD, 161 UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD }; 162 static const int8_t kTimeFieldsCount = 10; 163 164 165 // This is a pattern-of-last-resort used when we can't load a usable pattern out 166 // of a resource. 167 static const char16_t gDefaultPattern[] = 168 { 169 0x79, 0x4D, 0x4D, 0x64, 0x64, 0x20, 0x68, 0x68, 0x3A, 0x6D, 0x6D, 0x20, 0x61, 0 170 }; /* "yMMdd hh:mm a" */ 171 172 // This prefix is designed to NEVER MATCH real text, in order to 173 // suppress the parsing of negative numbers. Adjust as needed (if 174 // this becomes valid Unicode). 175 static const char16_t SUPPRESS_NEGATIVE_PREFIX[] = {0xAB00, 0}; 176 177 /** 178 * These are the tags we expect to see in normal resource bundle files associated 179 * with a locale. 180 */ 181 static const char16_t QUOTE = 0x27; // Single quote 182 183 /* 184 * The field range check bias for each UDateFormatField. 185 * The bias is added to the minimum and maximum values 186 * before they are compared to the parsed number. 187 * For example, the calendar stores zero-based month numbers 188 * but the parsed month numbers start at 1, so the bias is 1. 189 * 190 * A value of -1 means that the value is not checked. 191 */ 192 static const int32_t gFieldRangeBias[] = { 193 -1, // 'G' - UDAT_ERA_FIELD 194 -1, // 'y' - UDAT_YEAR_FIELD 195 1, // 'M' - UDAT_MONTH_FIELD 196 0, // 'd' - UDAT_DATE_FIELD 197 -1, // 'k' - UDAT_HOUR_OF_DAY1_FIELD 198 -1, // 'H' - UDAT_HOUR_OF_DAY0_FIELD 199 0, // 'm' - UDAT_MINUTE_FIELD 200 0, // 's' - UDAT_SECOND_FIELD 201 -1, // 'S' - UDAT_FRACTIONAL_SECOND_FIELD (0-999?) 202 -1, // 'E' - UDAT_DAY_OF_WEEK_FIELD (1-7?) 203 -1, // 'D' - UDAT_DAY_OF_YEAR_FIELD (1 - 366?) 204 -1, // 'F' - UDAT_DAY_OF_WEEK_IN_MONTH_FIELD (1-5?) 205 -1, // 'w' - UDAT_WEEK_OF_YEAR_FIELD (1-52?) 206 -1, // 'W' - UDAT_WEEK_OF_MONTH_FIELD (1-5?) 207 -1, // 'a' - UDAT_AM_PM_FIELD 208 -1, // 'h' - UDAT_HOUR1_FIELD 209 -1, // 'K' - UDAT_HOUR0_FIELD 210 -1, // 'z' - UDAT_TIMEZONE_FIELD 211 -1, // 'Y' - UDAT_YEAR_WOY_FIELD 212 -1, // 'e' - UDAT_DOW_LOCAL_FIELD 213 -1, // 'u' - UDAT_EXTENDED_YEAR_FIELD 214 -1, // 'g' - UDAT_JULIAN_DAY_FIELD 215 -1, // 'A' - UDAT_MILLISECONDS_IN_DAY_FIELD 216 -1, // 'Z' - UDAT_TIMEZONE_RFC_FIELD 217 -1, // 'v' - UDAT_TIMEZONE_GENERIC_FIELD 218 0, // 'c' - UDAT_STANDALONE_DAY_FIELD 219 1, // 'L' - UDAT_STANDALONE_MONTH_FIELD 220 -1, // 'Q' - UDAT_QUARTER_FIELD (1-4?) 221 -1, // 'q' - UDAT_STANDALONE_QUARTER_FIELD 222 -1, // 'V' - UDAT_TIMEZONE_SPECIAL_FIELD 223 -1, // 'U' - UDAT_YEAR_NAME_FIELD 224 -1, // 'O' - UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD 225 -1, // 'X' - UDAT_TIMEZONE_ISO_FIELD 226 -1, // 'x' - UDAT_TIMEZONE_ISO_LOCAL_FIELD 227 -1, // 'r' - UDAT_RELATED_YEAR_FIELD 228 #if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR 229 -1, // ':' - UDAT_TIME_SEPARATOR_FIELD 230 #else 231 -1, // (no pattern character currently) - UDAT_TIME_SEPARATOR_FIELD 232 #endif 233 }; 234 235 // When calendar uses hebr numbering (i.e. he@calendar=hebrew), 236 // offset the years within the current millennium down to 1-999 237 static const int32_t HEBREW_CAL_CUR_MILLENIUM_START_YEAR = 5000; 238 static const int32_t HEBREW_CAL_CUR_MILLENIUM_END_YEAR = 6000; 239 240 /** 241 * Maximum range for detecting daylight offset of a time zone when parsed time zone 242 * string indicates it's daylight saving time, but the detected time zone does not 243 * observe daylight saving time at the parsed date. 244 */ 245 static const double MAX_DAYLIGHT_DETECTION_RANGE = 30*365*24*60*60*1000.0; 246 247 static UMutex LOCK; 248 249 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleDateFormat) 250 251 SimpleDateFormat::NSOverride::~NSOverride() { 252 if (snf != nullptr) { 253 snf->removeRef(); 254 } 255 } 256 257 258 void SimpleDateFormat::NSOverride::free() { 259 NSOverride *cur = this; 260 while (cur) { 261 NSOverride *next_temp = cur->next; 262 delete cur; 263 cur = next_temp; 264 } 265 } 266 267 // no matter what the locale's default number format looked like, we want 268 // to modify it so that it doesn't use thousands separators, doesn't always 269 // show the decimal point, and recognizes integers only when parsing 270 static void fixNumberFormatForDates(NumberFormat &nf) { 271 nf.setGroupingUsed(false); 272 DecimalFormat* decfmt = dynamic_cast<DecimalFormat*>(&nf); 273 if (decfmt != nullptr) { 274 decfmt->setDecimalSeparatorAlwaysShown(false); 275 } 276 nf.setParseIntegerOnly(true); 277 nf.setMinimumFractionDigits(0); // To prevent "Jan 1.00, 1997.00" 278 } 279 280 static const SharedNumberFormat *createSharedNumberFormat( 281 NumberFormat *nfToAdopt) { 282 fixNumberFormatForDates(*nfToAdopt); 283 const SharedNumberFormat *result = new SharedNumberFormat(nfToAdopt); 284 if (result == nullptr) { 285 delete nfToAdopt; 286 } 287 return result; 288 } 289 290 static const SharedNumberFormat *createSharedNumberFormat( 291 const Locale &loc, UErrorCode &status) { 292 NumberFormat *nf = NumberFormat::createInstance(loc, status); 293 if (U_FAILURE(status)) { 294 return nullptr; 295 } 296 const SharedNumberFormat *result = createSharedNumberFormat(nf); 297 if (result == nullptr) { 298 status = U_MEMORY_ALLOCATION_ERROR; 299 } 300 return result; 301 } 302 303 static const SharedNumberFormat **allocSharedNumberFormatters() { 304 const SharedNumberFormat** result = static_cast<const SharedNumberFormat**>( 305 uprv_malloc(UDAT_FIELD_COUNT * sizeof(const SharedNumberFormat*))); 306 if (result == nullptr) { 307 return nullptr; 308 } 309 for (int32_t i = 0; i < UDAT_FIELD_COUNT; ++i) { 310 result[i] = nullptr; 311 } 312 return result; 313 } 314 315 static void freeSharedNumberFormatters(const SharedNumberFormat ** list) { 316 for (int32_t i = 0; i < UDAT_FIELD_COUNT; ++i) { 317 SharedObject::clearPtr(list[i]); 318 } 319 uprv_free(list); 320 } 321 322 const NumberFormat *SimpleDateFormat::getNumberFormatByIndex( 323 UDateFormatField index) const { 324 if (fSharedNumberFormatters == nullptr || 325 fSharedNumberFormatters[index] == nullptr) { 326 return fNumberFormat; 327 } 328 return &(**fSharedNumberFormatters[index]); 329 } 330 331 //---------------------------------------------------------------------- 332 333 SimpleDateFormat::~SimpleDateFormat() 334 { 335 delete fSymbols; 336 if (fSharedNumberFormatters) { 337 freeSharedNumberFormatters(fSharedNumberFormatters); 338 } 339 delete fTimeZoneFormat; 340 delete fSimpleNumberFormatter; 341 342 #if !UCONFIG_NO_BREAK_ITERATION 343 delete fCapitalizationBrkIter; 344 #endif 345 } 346 347 //---------------------------------------------------------------------- 348 349 SimpleDateFormat::SimpleDateFormat(UErrorCode& status) 350 : fLocale(Locale::getDefault()) 351 { 352 initializeBooleanAttributes(); 353 construct(kShort, static_cast<EStyle>(kShort + kDateOffset), fLocale, status); 354 initializeDefaultCentury(); 355 } 356 357 //---------------------------------------------------------------------- 358 359 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern, 360 UErrorCode &status) 361 : fPattern(pattern), 362 fLocale(Locale::getDefault()) 363 { 364 fDateOverride.setToBogus(); 365 fTimeOverride.setToBogus(); 366 initializeBooleanAttributes(); 367 initializeCalendar(nullptr,fLocale,status); 368 fSymbols = DateFormatSymbols::createForLocale(fLocale, status); 369 initialize(fLocale, status); 370 initializeDefaultCentury(); 371 372 } 373 //---------------------------------------------------------------------- 374 375 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern, 376 const UnicodeString& override, 377 UErrorCode &status) 378 : fPattern(pattern), 379 fLocale(Locale::getDefault()) 380 { 381 fDateOverride.setTo(override); 382 fTimeOverride.setToBogus(); 383 initializeBooleanAttributes(); 384 initializeCalendar(nullptr,fLocale,status); 385 fSymbols = DateFormatSymbols::createForLocale(fLocale, status); 386 initialize(fLocale, status); 387 initializeDefaultCentury(); 388 389 processOverrideString(fLocale,override,kOvrStrBoth,status); 390 391 } 392 393 //---------------------------------------------------------------------- 394 395 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern, 396 const Locale& locale, 397 UErrorCode& status) 398 : fPattern(pattern), 399 fLocale(locale) 400 { 401 402 fDateOverride.setToBogus(); 403 fTimeOverride.setToBogus(); 404 initializeBooleanAttributes(); 405 406 initializeCalendar(nullptr,fLocale,status); 407 fSymbols = DateFormatSymbols::createForLocale(fLocale, status); 408 initialize(fLocale, status); 409 initializeDefaultCentury(); 410 } 411 412 //---------------------------------------------------------------------- 413 414 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern, 415 const UnicodeString& override, 416 const Locale& locale, 417 UErrorCode& status) 418 : fPattern(pattern), 419 fLocale(locale) 420 { 421 422 fDateOverride.setTo(override); 423 fTimeOverride.setToBogus(); 424 initializeBooleanAttributes(); 425 426 initializeCalendar(nullptr,fLocale,status); 427 fSymbols = DateFormatSymbols::createForLocale(fLocale, status); 428 initialize(fLocale, status); 429 initializeDefaultCentury(); 430 431 processOverrideString(locale,override,kOvrStrBoth,status); 432 433 } 434 435 //---------------------------------------------------------------------- 436 437 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern, 438 DateFormatSymbols* symbolsToAdopt, 439 UErrorCode& status) 440 : fPattern(pattern), 441 fLocale(Locale::getDefault()), 442 fSymbols(symbolsToAdopt) 443 { 444 445 fDateOverride.setToBogus(); 446 fTimeOverride.setToBogus(); 447 initializeBooleanAttributes(); 448 449 initializeCalendar(nullptr,fLocale,status); 450 initialize(fLocale, status); 451 initializeDefaultCentury(); 452 } 453 454 //---------------------------------------------------------------------- 455 456 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern, 457 const DateFormatSymbols& symbols, 458 UErrorCode& status) 459 : fPattern(pattern), 460 fLocale(Locale::getDefault()), 461 fSymbols(new DateFormatSymbols(symbols)) 462 { 463 464 fDateOverride.setToBogus(); 465 fTimeOverride.setToBogus(); 466 initializeBooleanAttributes(); 467 468 initializeCalendar(nullptr, fLocale, status); 469 initialize(fLocale, status); 470 initializeDefaultCentury(); 471 } 472 473 //---------------------------------------------------------------------- 474 475 // Not for public consumption; used by DateFormat 476 SimpleDateFormat::SimpleDateFormat(EStyle timeStyle, 477 EStyle dateStyle, 478 const Locale& locale, 479 UErrorCode& status) 480 : fLocale(locale) 481 { 482 initializeBooleanAttributes(); 483 construct(timeStyle, dateStyle, fLocale, status); 484 if(U_SUCCESS(status)) { 485 initializeDefaultCentury(); 486 } 487 } 488 489 //---------------------------------------------------------------------- 490 491 /** 492 * Not for public consumption; used by DateFormat. This constructor 493 * never fails. If the resource data is not available, it uses the 494 * the last resort symbols. 495 */ 496 SimpleDateFormat::SimpleDateFormat(const Locale& locale, 497 UErrorCode& status) 498 : fPattern(gDefaultPattern), 499 fLocale(locale) 500 { 501 if (U_FAILURE(status)) return; 502 initializeBooleanAttributes(); 503 initializeCalendar(nullptr, fLocale, status); 504 fSymbols = DateFormatSymbols::createForLocale(fLocale, status); 505 if (U_FAILURE(status)) 506 { 507 status = U_ZERO_ERROR; 508 delete fSymbols; 509 // This constructor doesn't fail; it uses last resort data 510 fSymbols = new DateFormatSymbols(status); 511 /* test for nullptr */ 512 if (fSymbols == nullptr) { 513 status = U_MEMORY_ALLOCATION_ERROR; 514 return; 515 } 516 } 517 518 fDateOverride.setToBogus(); 519 fTimeOverride.setToBogus(); 520 521 initialize(fLocale, status); 522 if(U_SUCCESS(status)) { 523 initializeDefaultCentury(); 524 } 525 } 526 527 //---------------------------------------------------------------------- 528 529 SimpleDateFormat::SimpleDateFormat(const SimpleDateFormat& other) 530 : DateFormat(other), 531 fLocale(other.fLocale) 532 { 533 initializeBooleanAttributes(); 534 *this = other; 535 } 536 537 //---------------------------------------------------------------------- 538 539 SimpleDateFormat& SimpleDateFormat::operator=(const SimpleDateFormat& other) 540 { 541 if (this == &other) { 542 return *this; 543 } 544 545 // fSimpleNumberFormatter references fNumberFormatter, delete it 546 // before we call the = operator which may invalidate fNumberFormatter 547 delete fSimpleNumberFormatter; 548 fSimpleNumberFormatter = nullptr; 549 550 DateFormat::operator=(other); 551 fDateOverride = other.fDateOverride; 552 fTimeOverride = other.fTimeOverride; 553 554 delete fSymbols; 555 fSymbols = nullptr; 556 557 if (other.fSymbols) 558 fSymbols = new DateFormatSymbols(*other.fSymbols); 559 560 fDefaultCenturyStart = other.fDefaultCenturyStart; 561 fDefaultCenturyStartYear = other.fDefaultCenturyStartYear; 562 fHaveDefaultCentury = other.fHaveDefaultCentury; 563 564 fPattern = other.fPattern; 565 fHasMinute = other.fHasMinute; 566 fHasSecond = other.fHasSecond; 567 568 fLocale = other.fLocale; 569 570 // TimeZoneFormat can now be set independently via setter. 571 // If it is nullptr, it will be lazily initialized from locale. 572 delete fTimeZoneFormat; 573 fTimeZoneFormat = nullptr; 574 TimeZoneFormat *otherTZFormat; 575 { 576 // Synchronization is required here, when accessing other.fTimeZoneFormat, 577 // because another thread may be concurrently executing other.tzFormat(), 578 // a logically const function that lazily creates other.fTimeZoneFormat. 579 // 580 // Without synchronization, reordered memory writes could allow us 581 // to see a non-null fTimeZoneFormat before the object itself was 582 // fully initialized. In case of a race, it doesn't matter whether 583 // we see a null or a fully initialized other.fTimeZoneFormat, 584 // only that we avoid seeing a partially initialized object. 585 // 586 // Once initialized, no const function can modify fTimeZoneFormat, 587 // meaning that once we have safely grabbed the other.fTimeZoneFormat 588 // pointer, continued synchronization is not required to use it. 589 Mutex m(&LOCK); 590 otherTZFormat = other.fTimeZoneFormat; 591 } 592 if (otherTZFormat) { 593 fTimeZoneFormat = new TimeZoneFormat(*otherTZFormat); 594 } 595 596 #if !UCONFIG_NO_BREAK_ITERATION 597 if (other.fCapitalizationBrkIter != nullptr) { 598 fCapitalizationBrkIter = (other.fCapitalizationBrkIter)->clone(); 599 } 600 #endif 601 602 if (fSharedNumberFormatters != nullptr) { 603 freeSharedNumberFormatters(fSharedNumberFormatters); 604 fSharedNumberFormatters = nullptr; 605 } 606 if (other.fSharedNumberFormatters != nullptr) { 607 fSharedNumberFormatters = allocSharedNumberFormatters(); 608 if (fSharedNumberFormatters) { 609 for (int32_t i = 0; i < UDAT_FIELD_COUNT; ++i) { 610 SharedObject::copyPtr( 611 other.fSharedNumberFormatters[i], 612 fSharedNumberFormatters[i]); 613 } 614 } 615 } 616 617 UErrorCode localStatus = U_ZERO_ERROR; 618 // SimpleNumberFormatter does not have a copy constructor. Furthermore, 619 // it references data from an internal field, fNumberFormatter, 620 // so we must rematerialize that reference after copying over the number formatter. 621 initSimpleNumberFormatter(localStatus); 622 return *this; 623 } 624 625 //---------------------------------------------------------------------- 626 627 SimpleDateFormat* 628 SimpleDateFormat::clone() const 629 { 630 return new SimpleDateFormat(*this); 631 } 632 633 //---------------------------------------------------------------------- 634 635 bool 636 SimpleDateFormat::operator==(const Format& other) const 637 { 638 if (DateFormat::operator==(other)) { 639 // The DateFormat::operator== check for fCapitalizationContext equality above 640 // is sufficient to check equality of all derived context-related data. 641 // DateFormat::operator== guarantees following cast is safe 642 SimpleDateFormat* that = (SimpleDateFormat*)&other; 643 return (fPattern == that->fPattern && 644 fSymbols != nullptr && // Check for pathological object 645 that->fSymbols != nullptr && // Check for pathological object 646 *fSymbols == *that->fSymbols && 647 fHaveDefaultCentury == that->fHaveDefaultCentury && 648 fDefaultCenturyStart == that->fDefaultCenturyStart); 649 } 650 return false; 651 } 652 653 //---------------------------------------------------------------------- 654 static const char16_t* timeSkeletons[4] = { 655 u"jmmsszzzz", // kFull 656 u"jmmssz", // kLong 657 u"jmmss", // kMedium 658 u"jmm", // kShort 659 }; 660 661 void SimpleDateFormat::construct(EStyle timeStyle, 662 EStyle dateStyle, 663 const Locale& locale, 664 UErrorCode& status) 665 { 666 // called by several constructors to load pattern data from the resources 667 if (U_FAILURE(status)) return; 668 669 // We will need the calendar to know what type of symbols to load. 670 initializeCalendar(nullptr, locale, status); 671 if (U_FAILURE(status)) return; 672 673 // Load date time patterns directly from resources. 674 const char* cType = fCalendar ? fCalendar->getType() : nullptr; 675 LocalUResourceBundlePointer bundle(ures_open(nullptr, locale.getBaseName(), &status)); 676 if (U_FAILURE(status)) return; 677 678 UBool cTypeIsGregorian = true; 679 LocalUResourceBundlePointer dateTimePatterns; 680 if (cType != nullptr && uprv_strcmp(cType, "gregorian") != 0) { 681 CharString resourcePath("calendar/", status); 682 resourcePath.append(cType, status).append("/DateTimePatterns", status); 683 dateTimePatterns.adoptInstead( 684 ures_getByKeyWithFallback(bundle.getAlias(), resourcePath.data(), 685 (UResourceBundle*)nullptr, &status)); 686 cTypeIsGregorian = false; 687 } 688 689 // Check for "gregorian" fallback. 690 if (cTypeIsGregorian || status == U_MISSING_RESOURCE_ERROR) { 691 status = U_ZERO_ERROR; 692 dateTimePatterns.adoptInstead( 693 ures_getByKeyWithFallback(bundle.getAlias(), 694 "calendar/gregorian/DateTimePatterns", 695 (UResourceBundle*)nullptr, &status)); 696 } 697 if (U_FAILURE(status)) return; 698 699 LocalUResourceBundlePointer currentBundle; 700 701 if (ures_getSize(dateTimePatterns.getAlias()) <= kDateTime) 702 { 703 status = U_INVALID_FORMAT_ERROR; 704 return; 705 } 706 707 setLocaleIDs(ures_getLocaleByType(dateTimePatterns.getAlias(), ULOC_VALID_LOCALE, &status), 708 ures_getLocaleByType(dateTimePatterns.getAlias(), ULOC_ACTUAL_LOCALE, &status)); 709 710 // create a symbols object from the locale 711 fSymbols = DateFormatSymbols::createForLocale(locale, status); 712 if (U_FAILURE(status)) return; 713 /* test for nullptr */ 714 if (fSymbols == nullptr) { 715 status = U_MEMORY_ALLOCATION_ERROR; 716 return; 717 } 718 719 const char16_t *resStr,*ovrStr; 720 int32_t resStrLen,ovrStrLen = 0; 721 fDateOverride.setToBogus(); 722 fTimeOverride.setToBogus(); 723 724 UnicodeString timePattern; 725 if (timeStyle >= kFull && timeStyle <= kShort) { 726 bool hasRgOrHcSubtag = false; 727 // also use DTPG if the locale has the "rg" or "hc" ("hours") subtag-- even if the overriding region 728 // or hour cycle is the same as the one we get by default, we go through the DateTimePatternGenerator 729 UErrorCode dummyErr1 = U_ZERO_ERROR, dummyErr2 = U_ZERO_ERROR; 730 if (locale.getKeywordValue("rg", nullptr, 0, dummyErr1) > 0 || locale.getKeywordValue("hours", nullptr, 0, dummyErr2) > 0) { 731 hasRgOrHcSubtag = true; 732 } 733 734 const char* baseLocID = locale.getBaseName(); 735 if (baseLocID != nullptr && uprv_strcmp(baseLocID,"und")!=0) { 736 UErrorCode useStatus = U_ZERO_ERROR; 737 Locale baseLoc(baseLocID); 738 Locale validLoc(getLocale(ULOC_VALID_LOCALE, useStatus)); 739 if (hasRgOrHcSubtag || (U_SUCCESS(useStatus) && validLoc!=baseLoc)) { 740 bool useDTPG = hasRgOrHcSubtag; 741 const char* baseReg = baseLoc.getCountry(); // empty string if no region 742 if ((baseReg != nullptr && baseReg[0] != 0 && 743 uprv_strncmp(baseReg,validLoc.getCountry(),ULOC_COUNTRY_CAPACITY)!=0) 744 || uprv_strncmp(baseLoc.getLanguage(),validLoc.getLanguage(),ULOC_LANG_CAPACITY)!=0) { 745 // use DTPG if 746 // * baseLoc has a region and validLoc does not have the same one (or has none), OR 747 // * validLoc has a different language code than baseLoc 748 // * the original locale has the rg or hc subtag 749 useDTPG = true; 750 } 751 if (useDTPG) { 752 // The standard time formats may have the wrong time cycle, because: 753 // the valid locale differs in important ways (region, language) from 754 // the base locale. 755 // We could *also* check whether they do actually have a mismatch with 756 // the time cycle preferences for the region, but that is a lot more 757 // work for little or no additional benefit, since just going ahead 758 // and always synthesizing the time format as per the following should 759 // create a locale-appropriate pattern with cycle that matches the 760 // region preferences anyway. 761 LocalPointer<DateTimePatternGenerator> dtpg(DateTimePatternGenerator::createInstanceNoStdPat(locale, useStatus)); 762 if (U_SUCCESS(useStatus)) { 763 UnicodeString timeSkeleton(true, timeSkeletons[timeStyle], -1); 764 timePattern = dtpg->getBestPattern(timeSkeleton, useStatus); 765 } 766 } 767 } 768 } 769 } 770 771 // if the pattern should include both date and time information, use the date/time 772 // pattern string as a guide to tell use how to glue together the appropriate date 773 // and time pattern strings. 774 if ((timeStyle != kNone) && (dateStyle != kNone)) 775 { 776 UnicodeString tempus1(timePattern); 777 if (tempus1.length() == 0) { 778 currentBundle.adoptInstead( 779 ures_getByIndex(dateTimePatterns.getAlias(), static_cast<int32_t>(timeStyle), nullptr, &status)); 780 if (U_FAILURE(status)) { 781 status = U_INVALID_FORMAT_ERROR; 782 return; 783 } 784 switch (ures_getType(currentBundle.getAlias())) { 785 case URES_STRING: { 786 resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status); 787 break; 788 } 789 case URES_ARRAY: { 790 resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status); 791 ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status); 792 fTimeOverride.setTo(true, ovrStr, ovrStrLen); 793 break; 794 } 795 default: { 796 status = U_INVALID_FORMAT_ERROR; 797 return; 798 } 799 } 800 801 tempus1.setTo(true, resStr, resStrLen); 802 } 803 804 currentBundle.adoptInstead( 805 ures_getByIndex(dateTimePatterns.getAlias(), static_cast<int32_t>(dateStyle), nullptr, &status)); 806 if (U_FAILURE(status)) { 807 status = U_INVALID_FORMAT_ERROR; 808 return; 809 } 810 switch (ures_getType(currentBundle.getAlias())) { 811 case URES_STRING: { 812 resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status); 813 break; 814 } 815 case URES_ARRAY: { 816 resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status); 817 ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status); 818 fDateOverride.setTo(true, ovrStr, ovrStrLen); 819 break; 820 } 821 default: { 822 status = U_INVALID_FORMAT_ERROR; 823 return; 824 } 825 } 826 827 UnicodeString tempus2(true, resStr, resStrLen); 828 829 // Currently, for compatibility with pre-CLDR-42 data, we default to the "atTime" 830 // combining patterns. Depending on guidance in CLDR 42 spec and on DisplayOptions, 831 // we may change this. 832 LocalUResourceBundlePointer dateAtTimePatterns; 833 if (!cTypeIsGregorian) { 834 CharString resourcePath("calendar/", status); 835 resourcePath.append(cType, status).append("/DateTimePatterns%atTime", status); 836 dateAtTimePatterns.adoptInstead( 837 ures_getByKeyWithFallback(bundle.getAlias(), resourcePath.data(), 838 nullptr, &status)); 839 } 840 if (cTypeIsGregorian || status == U_MISSING_RESOURCE_ERROR) { 841 status = U_ZERO_ERROR; 842 dateAtTimePatterns.adoptInstead( 843 ures_getByKeyWithFallback(bundle.getAlias(), 844 "calendar/gregorian/DateTimePatterns%atTime", 845 nullptr, &status)); 846 } 847 if (U_SUCCESS(status) && ures_getSize(dateAtTimePatterns.getAlias()) >= 4) { 848 resStr = ures_getStringByIndex(dateAtTimePatterns.getAlias(), dateStyle - kDateOffset, &resStrLen, &status); 849 } else { 850 status = U_ZERO_ERROR; 851 int32_t glueIndex = kDateTime; 852 int32_t patternsSize = ures_getSize(dateTimePatterns.getAlias()); 853 if (patternsSize >= (kDateTimeOffset + kShort + 1)) { 854 // Get proper date time format 855 glueIndex = static_cast<int32_t>(kDateTimeOffset + (dateStyle - kDateOffset)); 856 } 857 858 resStr = ures_getStringByIndex(dateTimePatterns.getAlias(), glueIndex, &resStrLen, &status); 859 } 860 SimpleFormatter(UnicodeString(true, resStr, resStrLen), 2, 2, status). 861 format(tempus1, tempus2, fPattern, status); 862 } 863 // if the pattern includes just time data or just date date, load the appropriate 864 // pattern string from the resources 865 // setTo() - see DateFormatSymbols::assignArray comments 866 else if (timeStyle != kNone) { 867 fPattern.setTo(timePattern); 868 if (fPattern.length() == 0) { 869 currentBundle.adoptInstead( 870 ures_getByIndex(dateTimePatterns.getAlias(), static_cast<int32_t>(timeStyle), nullptr, &status)); 871 if (U_FAILURE(status)) { 872 status = U_INVALID_FORMAT_ERROR; 873 return; 874 } 875 switch (ures_getType(currentBundle.getAlias())) { 876 case URES_STRING: { 877 resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status); 878 break; 879 } 880 case URES_ARRAY: { 881 resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status); 882 ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status); 883 fDateOverride.setTo(true, ovrStr, ovrStrLen); 884 break; 885 } 886 default: { 887 status = U_INVALID_FORMAT_ERROR; 888 return; 889 } 890 } 891 fPattern.setTo(true, resStr, resStrLen); 892 } 893 } 894 else if (dateStyle != kNone) { 895 currentBundle.adoptInstead( 896 ures_getByIndex(dateTimePatterns.getAlias(), static_cast<int32_t>(dateStyle), nullptr, &status)); 897 if (U_FAILURE(status)) { 898 status = U_INVALID_FORMAT_ERROR; 899 return; 900 } 901 switch (ures_getType(currentBundle.getAlias())) { 902 case URES_STRING: { 903 resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status); 904 break; 905 } 906 case URES_ARRAY: { 907 resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status); 908 ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status); 909 fDateOverride.setTo(true, ovrStr, ovrStrLen); 910 break; 911 } 912 default: { 913 status = U_INVALID_FORMAT_ERROR; 914 return; 915 } 916 } 917 fPattern.setTo(true, resStr, resStrLen); 918 } 919 920 // and if it includes _neither_, that's an error 921 else 922 status = U_INVALID_FORMAT_ERROR; 923 924 // finally, finish initializing by creating a Calendar and a NumberFormat 925 initialize(locale, status); 926 } 927 928 //---------------------------------------------------------------------- 929 930 Calendar* 931 SimpleDateFormat::initializeCalendar(TimeZone* adoptZone, const Locale& locale, UErrorCode& status) 932 { 933 if(!U_FAILURE(status)) { 934 fCalendar = Calendar::createInstance( 935 adoptZone ? adoptZone : TimeZone::forLocaleOrDefault(locale), locale, status); 936 } 937 return fCalendar; 938 } 939 940 void 941 SimpleDateFormat::initialize(const Locale& locale, 942 UErrorCode& status) 943 { 944 if (U_FAILURE(status)) return; 945 946 parsePattern(); // Need this before initNumberFormatters(), to set fHasHanYearChar 947 948 // Simple-minded hack to force Gannen year numbering for ja@calendar=japanese 949 // if format is non-numeric (includes 年) and fDateOverride is not already specified. 950 // Now this does get updated if applyPattern subsequently changes the pattern type. 951 if (fDateOverride.isBogus() && fHasHanYearChar && 952 fCalendar != nullptr && 953 typeid(*fCalendar) == typeid(JapaneseCalendar) && 954 uprv_strcmp(fLocale.getLanguage(),"ja") == 0) { 955 fDateOverride.setTo(u"y=jpanyear", -1); 956 } 957 958 // We don't need to check that the row count is >= 1, since all 2d arrays have at 959 // least one row 960 fNumberFormat = NumberFormat::createInstance(locale, status); 961 if (fNumberFormat != nullptr && U_SUCCESS(status)) 962 { 963 fixNumberFormatForDates(*fNumberFormat); 964 //fNumberFormat->setLenient(true); // Java uses a custom DateNumberFormat to format/parse 965 966 initNumberFormatters(locale, status); 967 initSimpleNumberFormatter(status); 968 969 } 970 else if (U_SUCCESS(status)) 971 { 972 status = U_MISSING_RESOURCE_ERROR; 973 } 974 } 975 976 /* Initialize the fields we use to disambiguate ambiguous years. Separate 977 * so we can call it from readObject(). 978 */ 979 void SimpleDateFormat::initializeDefaultCentury() 980 { 981 if(fCalendar) { 982 fHaveDefaultCentury = fCalendar->haveDefaultCentury(); 983 if(fHaveDefaultCentury) { 984 fDefaultCenturyStart = fCalendar->defaultCenturyStart(); 985 fDefaultCenturyStartYear = fCalendar->defaultCenturyStartYear(); 986 } else { 987 fDefaultCenturyStart = DBL_MIN; 988 fDefaultCenturyStartYear = -1; 989 } 990 } 991 } 992 993 /* 994 * Initialize the boolean attributes. Separate so we can call it from all constructors. 995 */ 996 void SimpleDateFormat::initializeBooleanAttributes() 997 { 998 UErrorCode status = U_ZERO_ERROR; 999 1000 setBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, true, status); 1001 setBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, true, status); 1002 setBooleanAttribute(UDAT_PARSE_PARTIAL_LITERAL_MATCH, true, status); 1003 setBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, true, status); 1004 } 1005 1006 /* Define one-century window into which to disambiguate dates using 1007 * two-digit years. Make public in JDK 1.2. 1008 */ 1009 void SimpleDateFormat::parseAmbiguousDatesAsAfter(UDate startDate, UErrorCode& status) 1010 { 1011 if(U_FAILURE(status)) { 1012 return; 1013 } 1014 if(!fCalendar) { 1015 status = U_ILLEGAL_ARGUMENT_ERROR; 1016 return; 1017 } 1018 1019 fCalendar->setTime(startDate, status); 1020 if(U_SUCCESS(status)) { 1021 fHaveDefaultCentury = true; 1022 fDefaultCenturyStart = startDate; 1023 fDefaultCenturyStartYear = fCalendar->get(UCAL_YEAR, status); 1024 } 1025 } 1026 1027 //---------------------------------------------------------------------- 1028 1029 UnicodeString& 1030 SimpleDateFormat::format(Calendar& cal, UnicodeString& appendTo, FieldPosition& pos) const 1031 { 1032 UErrorCode status = U_ZERO_ERROR; 1033 FieldPositionOnlyHandler handler(pos); 1034 return _format(cal, appendTo, handler, status); 1035 } 1036 1037 //---------------------------------------------------------------------- 1038 1039 UnicodeString& 1040 SimpleDateFormat::format(Calendar& cal, UnicodeString& appendTo, 1041 FieldPositionIterator* posIter, UErrorCode& status) const 1042 { 1043 FieldPositionIteratorHandler handler(posIter, status); 1044 return _format(cal, appendTo, handler, status); 1045 } 1046 1047 //---------------------------------------------------------------------- 1048 1049 UnicodeString& 1050 SimpleDateFormat::_format(Calendar& cal, UnicodeString& appendTo, 1051 FieldPositionHandler& handler, UErrorCode& status) const 1052 { 1053 if ( U_FAILURE(status) ) { 1054 return appendTo; 1055 } 1056 Calendar* workCal = &cal; 1057 Calendar* calClone = nullptr; 1058 if (&cal != fCalendar && typeid(cal) != typeid(*fCalendar)) { 1059 // Different calendar type 1060 // We use the time and time zone from the input calendar, but 1061 // do not use the input calendar for field calculation. 1062 calClone = fCalendar->clone(); 1063 if (calClone != nullptr) { 1064 UDate t = cal.getTime(status); 1065 calClone->setTime(t, status); 1066 calClone->setTimeZone(cal.getTimeZone()); 1067 workCal = calClone; 1068 } else { 1069 status = U_MEMORY_ALLOCATION_ERROR; 1070 return appendTo; 1071 } 1072 } 1073 1074 UBool inQuote = false; 1075 char16_t prevCh = 0; 1076 int32_t count = 0; 1077 int32_t fieldNum = 0; 1078 UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status); 1079 1080 // loop through the pattern string character by character 1081 int32_t patternLength = fPattern.length(); 1082 for (int32_t i = 0; i < patternLength && U_SUCCESS(status); ++i) { 1083 char16_t ch = fPattern[i]; 1084 1085 // Use subFormat() to format a repeated pattern character 1086 // when a different pattern or non-pattern character is seen 1087 if (ch != prevCh && count > 0) { 1088 subFormat(appendTo, prevCh, count, capitalizationContext, fieldNum++, 1089 prevCh, handler, *workCal, status); 1090 count = 0; 1091 } 1092 if (ch == QUOTE) { 1093 // Consecutive single quotes are a single quote literal, 1094 // either outside of quotes or between quotes 1095 if ((i+1) < patternLength && fPattern[i+1] == QUOTE) { 1096 appendTo += QUOTE; 1097 ++i; 1098 } else { 1099 inQuote = ! inQuote; 1100 } 1101 } 1102 else if (!inQuote && isSyntaxChar(ch)) { 1103 // ch is a date-time pattern character to be interpreted 1104 // by subFormat(); count the number of times it is repeated 1105 prevCh = ch; 1106 ++count; 1107 } 1108 else { 1109 // Append quoted characters and unquoted non-pattern characters 1110 appendTo += ch; 1111 } 1112 } 1113 1114 // Format the last item in the pattern, if any 1115 if (count > 0) { 1116 subFormat(appendTo, prevCh, count, capitalizationContext, fieldNum++, 1117 prevCh, handler, *workCal, status); 1118 } 1119 1120 delete calClone; 1121 1122 return appendTo; 1123 } 1124 1125 //---------------------------------------------------------------------- 1126 1127 /* Map calendar field into calendar field level. 1128 * the larger the level, the smaller the field unit. 1129 * For example, UCAL_ERA level is 0, UCAL_YEAR level is 10, 1130 * UCAL_MONTH level is 20. 1131 * NOTE: if new fields adds in, the table needs to update. 1132 */ 1133 const int32_t 1134 SimpleDateFormat::fgCalendarFieldToLevel[] = 1135 { 1136 /*GyM*/ 0, 10, 20, 1137 /*wW*/ 20, 30, 1138 /*dDEF*/ 30, 20, 30, 30, 1139 /*ahHm*/ 40, 50, 50, 60, 1140 /*sS*/ 70, 80, 1141 /*z?Y*/ 0, 0, 10, 1142 /*eug*/ 30, 10, 0, 1143 /*A?.*/ 40, 0, 0 1144 }; 1145 1146 int32_t SimpleDateFormat::getLevelFromChar(char16_t ch) { 1147 // Map date field LETTER into calendar field level. 1148 // the larger the level, the smaller the field unit. 1149 // NOTE: if new fields adds in, the table needs to update. 1150 static const int32_t mapCharToLevel[] = { 1151 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1152 // 1153 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1154 // ! " # $ % & ' ( ) * + , - . / 1155 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1156 #if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR 1157 // 0 1 2 3 4 5 6 7 8 9 : ; < = > ? 1158 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, 1159 #else 1160 // 0 1 2 3 4 5 6 7 8 9 : ; < = > ? 1161 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1162 #endif 1163 // @ A B C D E F G H I J K L M N O 1164 -1, 40, -1, -1, 20, 30, 30, 0, 50, -1, -1, 50, 20, 20, -1, 0, 1165 // P Q R S T U V W X Y Z [ \ ] ^ _ 1166 -1, 20, -1, 80, -1, 10, 0, 30, 0, 10, 0, -1, -1, -1, -1, -1, 1167 // ` a b c d e f g h i j k l m n o 1168 -1, 40, -1, 30, 30, 30, -1, 0, 50, -1, -1, 50, 0, 60, -1, -1, 1169 // p q r s t u v w x y z { | } ~ 1170 -1, 20, 10, 70, -1, 10, 0, 20, 0, 10, 0, -1, -1, -1, -1, -1 1171 }; 1172 1173 return ch < UPRV_LENGTHOF(mapCharToLevel) ? mapCharToLevel[ch] : -1; 1174 } 1175 1176 UBool SimpleDateFormat::isSyntaxChar(char16_t ch) { 1177 static const UBool mapCharToIsSyntax[] = { 1178 // 1179 false, false, false, false, false, false, false, false, 1180 // 1181 false, false, false, false, false, false, false, false, 1182 // 1183 false, false, false, false, false, false, false, false, 1184 // 1185 false, false, false, false, false, false, false, false, 1186 // ! " # $ % & ' 1187 false, false, false, false, false, false, false, false, 1188 // ( ) * + , - . / 1189 false, false, false, false, false, false, false, false, 1190 // 0 1 2 3 4 5 6 7 1191 false, false, false, false, false, false, false, false, 1192 #if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR 1193 // 8 9 : ; < = > ? 1194 false, false, true, false, false, false, false, false, 1195 #else 1196 // 8 9 : ; < = > ? 1197 false, false, false, false, false, false, false, false, 1198 #endif 1199 // @ A B C D E F G 1200 false, true, true, true, true, true, true, true, 1201 // H I J K L M N O 1202 true, true, true, true, true, true, true, true, 1203 // P Q R S T U V W 1204 true, true, true, true, true, true, true, true, 1205 // X Y Z [ \ ] ^ _ 1206 true, true, true, false, false, false, false, false, 1207 // ` a b c d e f g 1208 false, true, true, true, true, true, true, true, 1209 // h i j k l m n o 1210 true, true, true, true, true, true, true, true, 1211 // p q r s t u v w 1212 true, true, true, true, true, true, true, true, 1213 // x y z { | } ~ 1214 true, true, true, false, false, false, false, false 1215 }; 1216 1217 return ch < UPRV_LENGTHOF(mapCharToIsSyntax) ? mapCharToIsSyntax[ch] : false; 1218 } 1219 1220 // Map index into pattern character string to Calendar field number. 1221 const UCalendarDateFields 1222 SimpleDateFormat::fgPatternIndexToCalendarField[] = 1223 { 1224 /*GyM*/ UCAL_ERA, UCAL_YEAR, UCAL_MONTH, 1225 /*dkH*/ UCAL_DATE, UCAL_HOUR_OF_DAY, UCAL_HOUR_OF_DAY, 1226 /*msS*/ UCAL_MINUTE, UCAL_SECOND, UCAL_MILLISECOND, 1227 /*EDF*/ UCAL_DAY_OF_WEEK, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK_IN_MONTH, 1228 /*wWa*/ UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_AM_PM, 1229 /*hKz*/ UCAL_HOUR, UCAL_HOUR, UCAL_ZONE_OFFSET, 1230 /*Yeu*/ UCAL_YEAR_WOY, UCAL_DOW_LOCAL, UCAL_EXTENDED_YEAR, 1231 /*gAZ*/ UCAL_JULIAN_DAY, UCAL_MILLISECONDS_IN_DAY, UCAL_ZONE_OFFSET, 1232 /*v*/ UCAL_ZONE_OFFSET, 1233 /*c*/ UCAL_DOW_LOCAL, 1234 /*L*/ UCAL_MONTH, 1235 /*Q*/ UCAL_MONTH, 1236 /*q*/ UCAL_MONTH, 1237 /*V*/ UCAL_ZONE_OFFSET, 1238 /*U*/ UCAL_YEAR, 1239 /*O*/ UCAL_ZONE_OFFSET, 1240 /*Xx*/ UCAL_ZONE_OFFSET, UCAL_ZONE_OFFSET, 1241 /*r*/ UCAL_EXTENDED_YEAR, 1242 /*bB*/ UCAL_FIELD_COUNT, UCAL_FIELD_COUNT, // no mappings to calendar fields 1243 #if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR 1244 /*:*/ UCAL_FIELD_COUNT, /* => no useful mapping to any calendar field */ 1245 #else 1246 /*no pattern char for UDAT_TIME_SEPARATOR_FIELD*/ UCAL_FIELD_COUNT, /* => no useful mapping to any calendar field */ 1247 #endif 1248 }; 1249 1250 // Map index into pattern character string to DateFormat field number 1251 const UDateFormatField 1252 SimpleDateFormat::fgPatternIndexToDateFormatField[] = { 1253 /*GyM*/ UDAT_ERA_FIELD, UDAT_YEAR_FIELD, UDAT_MONTH_FIELD, 1254 /*dkH*/ UDAT_DATE_FIELD, UDAT_HOUR_OF_DAY1_FIELD, UDAT_HOUR_OF_DAY0_FIELD, 1255 /*msS*/ UDAT_MINUTE_FIELD, UDAT_SECOND_FIELD, UDAT_FRACTIONAL_SECOND_FIELD, 1256 /*EDF*/ UDAT_DAY_OF_WEEK_FIELD, UDAT_DAY_OF_YEAR_FIELD, UDAT_DAY_OF_WEEK_IN_MONTH_FIELD, 1257 /*wWa*/ UDAT_WEEK_OF_YEAR_FIELD, UDAT_WEEK_OF_MONTH_FIELD, UDAT_AM_PM_FIELD, 1258 /*hKz*/ UDAT_HOUR1_FIELD, UDAT_HOUR0_FIELD, UDAT_TIMEZONE_FIELD, 1259 /*Yeu*/ UDAT_YEAR_WOY_FIELD, UDAT_DOW_LOCAL_FIELD, UDAT_EXTENDED_YEAR_FIELD, 1260 /*gAZ*/ UDAT_JULIAN_DAY_FIELD, UDAT_MILLISECONDS_IN_DAY_FIELD, UDAT_TIMEZONE_RFC_FIELD, 1261 /*v*/ UDAT_TIMEZONE_GENERIC_FIELD, 1262 /*c*/ UDAT_STANDALONE_DAY_FIELD, 1263 /*L*/ UDAT_STANDALONE_MONTH_FIELD, 1264 /*Q*/ UDAT_QUARTER_FIELD, 1265 /*q*/ UDAT_STANDALONE_QUARTER_FIELD, 1266 /*V*/ UDAT_TIMEZONE_SPECIAL_FIELD, 1267 /*U*/ UDAT_YEAR_NAME_FIELD, 1268 /*O*/ UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD, 1269 /*Xx*/ UDAT_TIMEZONE_ISO_FIELD, UDAT_TIMEZONE_ISO_LOCAL_FIELD, 1270 /*r*/ UDAT_RELATED_YEAR_FIELD, 1271 /*bB*/ UDAT_AM_PM_MIDNIGHT_NOON_FIELD, UDAT_FLEXIBLE_DAY_PERIOD_FIELD, 1272 #if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR 1273 /*:*/ UDAT_TIME_SEPARATOR_FIELD, 1274 #else 1275 /*no pattern char for UDAT_TIME_SEPARATOR_FIELD*/ UDAT_TIME_SEPARATOR_FIELD, 1276 #endif 1277 }; 1278 1279 //---------------------------------------------------------------------- 1280 1281 /** 1282 * Append symbols[value] to dst. Make sure the array index is not out 1283 * of bounds. 1284 */ 1285 static inline void 1286 _appendSymbol(UnicodeString& dst, 1287 int32_t value, 1288 const UnicodeString* symbols, 1289 int32_t symbolsCount) { 1290 U_ASSERT(0 <= value && value < symbolsCount); 1291 if (0 <= value && value < symbolsCount) { 1292 dst += symbols[value]; 1293 } 1294 } 1295 1296 static inline void 1297 _appendSymbolWithMonthPattern(UnicodeString& dst, int32_t value, const UnicodeString* symbols, int32_t symbolsCount, 1298 const UnicodeString* monthPattern, UErrorCode& status) { 1299 U_ASSERT(0 <= value && value < symbolsCount); 1300 if (0 <= value && value < symbolsCount) { 1301 if (monthPattern == nullptr) { 1302 dst += symbols[value]; 1303 } else { 1304 SimpleFormatter(*monthPattern, 1, 1, status).format(symbols[value], dst, status); 1305 } 1306 } 1307 } 1308 1309 //---------------------------------------------------------------------- 1310 1311 void 1312 SimpleDateFormat::initSimpleNumberFormatter(UErrorCode &status) { 1313 if (U_FAILURE(status)) { 1314 return; 1315 } 1316 const auto* df = dynamic_cast<const DecimalFormat*>(fNumberFormat); 1317 if (df == nullptr) { 1318 return; 1319 } 1320 const DecimalFormatSymbols* syms = df->getDecimalFormatSymbols(); 1321 if (syms == nullptr) { 1322 return; 1323 } 1324 fSimpleNumberFormatter = new number::SimpleNumberFormatter( 1325 number::SimpleNumberFormatter::forLocaleAndSymbolsAndGroupingStrategy( 1326 fLocale, *syms, UNUM_GROUPING_OFF, status 1327 ) 1328 ); 1329 if (fSimpleNumberFormatter == nullptr) { 1330 status = U_MEMORY_ALLOCATION_ERROR; 1331 } 1332 } 1333 1334 void 1335 SimpleDateFormat::initNumberFormatters(const Locale &locale,UErrorCode &status) { 1336 if (U_FAILURE(status)) { 1337 return; 1338 } 1339 if ( fDateOverride.isBogus() && fTimeOverride.isBogus() ) { 1340 return; 1341 } 1342 umtx_lock(&LOCK); 1343 if (fSharedNumberFormatters == nullptr) { 1344 fSharedNumberFormatters = allocSharedNumberFormatters(); 1345 if (fSharedNumberFormatters == nullptr) { 1346 status = U_MEMORY_ALLOCATION_ERROR; 1347 } 1348 } 1349 umtx_unlock(&LOCK); 1350 1351 if (U_FAILURE(status)) { 1352 return; 1353 } 1354 1355 processOverrideString(locale,fDateOverride,kOvrStrDate,status); 1356 processOverrideString(locale,fTimeOverride,kOvrStrTime,status); 1357 } 1358 1359 void 1360 SimpleDateFormat::processOverrideString(const Locale &locale, const UnicodeString &str, int8_t type, UErrorCode &status) { 1361 if (str.isBogus() || U_FAILURE(status)) { 1362 return; 1363 } 1364 1365 int32_t start = 0; 1366 int32_t len; 1367 UnicodeString nsName; 1368 UnicodeString ovrField; 1369 UBool moreToProcess = true; 1370 NSOverride *overrideList = nullptr; 1371 1372 while (moreToProcess) { 1373 int32_t delimiterPosition = str.indexOf(static_cast<char16_t>(ULOC_KEYWORD_ITEM_SEPARATOR_UNICODE), start); 1374 if (delimiterPosition == -1) { 1375 moreToProcess = false; 1376 len = str.length() - start; 1377 } else { 1378 len = delimiterPosition - start; 1379 } 1380 UnicodeString currentString(str,start,len); 1381 int32_t equalSignPosition = currentString.indexOf(static_cast<char16_t>(ULOC_KEYWORD_ASSIGN_UNICODE), 0); 1382 if (equalSignPosition == -1) { // Simple override string such as "hebrew" 1383 nsName.setTo(currentString); 1384 ovrField.setToBogus(); 1385 } else { // Field specific override string such as "y=hebrew" 1386 nsName.setTo(currentString,equalSignPosition+1); 1387 ovrField.setTo(currentString,0,1); // We just need the first character. 1388 } 1389 1390 int32_t nsNameHash = nsName.hashCode(); 1391 // See if the numbering system is in the override list, if not, then add it. 1392 NSOverride *curr = overrideList; 1393 const SharedNumberFormat *snf = nullptr; 1394 UBool found = false; 1395 while ( curr && !found ) { 1396 if ( curr->hash == nsNameHash ) { 1397 snf = curr->snf; 1398 found = true; 1399 } 1400 curr = curr->next; 1401 } 1402 1403 if (!found) { 1404 LocalPointer<NSOverride> cur(new NSOverride); 1405 if (!cur.isNull()) { 1406 char kw[ULOC_KEYWORD_AND_VALUES_CAPACITY]; 1407 uprv_strcpy(kw,"numbers="); 1408 nsName.extract(0,len,kw+8,ULOC_KEYWORD_AND_VALUES_CAPACITY-8,US_INV); 1409 1410 Locale ovrLoc(locale.getLanguage(),locale.getCountry(),locale.getVariant(),kw); 1411 cur->hash = nsNameHash; 1412 cur->next = overrideList; 1413 SharedObject::copyPtr( 1414 createSharedNumberFormat(ovrLoc, status), cur->snf); 1415 if (U_FAILURE(status)) { 1416 if (overrideList) { 1417 overrideList->free(); 1418 } 1419 return; 1420 } 1421 snf = cur->snf; 1422 overrideList = cur.orphan(); 1423 } else { 1424 status = U_MEMORY_ALLOCATION_ERROR; 1425 if (overrideList) { 1426 overrideList->free(); 1427 } 1428 return; 1429 } 1430 } 1431 1432 // Now that we have an appropriate number formatter, fill in the appropriate spaces in the 1433 // number formatters table. 1434 if (ovrField.isBogus()) { 1435 switch (type) { 1436 case kOvrStrDate: 1437 case kOvrStrBoth: { 1438 for ( int8_t i=0 ; i<kDateFieldsCount; i++ ) { 1439 SharedObject::copyPtr(snf, fSharedNumberFormatters[kDateFields[i]]); 1440 } 1441 if (type==kOvrStrDate) { 1442 break; 1443 } 1444 U_FALLTHROUGH; 1445 } 1446 case kOvrStrTime : { 1447 for ( int8_t i=0 ; i<kTimeFieldsCount; i++ ) { 1448 SharedObject::copyPtr(snf, fSharedNumberFormatters[kTimeFields[i]]); 1449 } 1450 break; 1451 } 1452 } 1453 } else { 1454 // if the pattern character is unrecognized, signal an error and bail out 1455 UDateFormatField patternCharIndex = 1456 DateFormatSymbols::getPatternCharIndex(ovrField.charAt(0)); 1457 if (patternCharIndex == UDAT_FIELD_COUNT) { 1458 status = U_INVALID_FORMAT_ERROR; 1459 if (overrideList) { 1460 overrideList->free(); 1461 } 1462 return; 1463 } 1464 SharedObject::copyPtr(snf, fSharedNumberFormatters[patternCharIndex]); 1465 } 1466 1467 start = delimiterPosition + 1; 1468 } 1469 if (overrideList) { 1470 overrideList->free(); 1471 } 1472 } 1473 1474 //--------------------------------------------------------------------- 1475 void 1476 SimpleDateFormat::subFormat(UnicodeString &appendTo, 1477 char16_t ch, 1478 int32_t count, 1479 UDisplayContext capitalizationContext, 1480 int32_t fieldNum, 1481 char16_t fieldToOutput, 1482 FieldPositionHandler& handler, 1483 Calendar& cal, 1484 UErrorCode& status) const 1485 { 1486 static const int32_t maxIntCount = 10; 1487 static const UnicodeString hebr(u"hebr"); 1488 1489 if (U_FAILURE(status)) { 1490 return; 1491 } 1492 1493 // this function gets called by format() to produce the appropriate substitution 1494 // text for an individual pattern symbol (e.g., "HH" or "yyyy") 1495 1496 UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(ch); 1497 int32_t beginOffset = appendTo.length(); 1498 DateFormatSymbols::ECapitalizationContextUsageType capContextUsageType = DateFormatSymbols::kCapContextUsageOther; 1499 1500 // if the pattern character is unrecognized, signal an error and dump out 1501 if (patternCharIndex == UDAT_FIELD_COUNT) 1502 { 1503 if (ch != 0x6C) { // pattern char 'l' (SMALL LETTER L) just gets ignored 1504 status = U_INVALID_FORMAT_ERROR; 1505 } 1506 return; 1507 } 1508 1509 UCalendarDateFields field = fgPatternIndexToCalendarField[patternCharIndex]; 1510 int32_t value = 0; 1511 // Don't get value unless it is useful 1512 if (field < UCAL_FIELD_COUNT) { 1513 value = (patternCharIndex != UDAT_RELATED_YEAR_FIELD)? cal.get(field, status): cal.getRelatedYear(status); 1514 if (U_FAILURE(status)) { 1515 return; 1516 } 1517 } 1518 1519 const NumberFormat *currentNumberFormat = getNumberFormatByIndex(patternCharIndex); 1520 if (currentNumberFormat == nullptr) { 1521 status = U_INTERNAL_PROGRAM_ERROR; 1522 return; 1523 } 1524 1525 switch (patternCharIndex) { 1526 1527 // for any "G" symbol, write out the appropriate era string 1528 // "GGGG" is wide era name, "GGGGG" is narrow era name, anything else is abbreviated name 1529 case UDAT_ERA_FIELD: 1530 { 1531 const char* type = cal.getType(); 1532 if (strcmp(type, "chinese") == 0 || 1533 strcmp(type, "dangi") == 0) { 1534 zeroPaddingNumber(currentNumberFormat,appendTo, value, 1, 9); // as in ICU4J 1535 } else { 1536 if (count == 5) { 1537 _appendSymbol(appendTo, value, fSymbols->fNarrowEras, fSymbols->fNarrowErasCount); 1538 capContextUsageType = DateFormatSymbols::kCapContextUsageEraNarrow; 1539 } else if (count == 4) { 1540 _appendSymbol(appendTo, value, fSymbols->fEraNames, fSymbols->fEraNamesCount); 1541 capContextUsageType = DateFormatSymbols::kCapContextUsageEraWide; 1542 } else { 1543 _appendSymbol(appendTo, value, fSymbols->fEras, fSymbols->fErasCount); 1544 capContextUsageType = DateFormatSymbols::kCapContextUsageEraAbbrev; 1545 } 1546 } 1547 } 1548 break; 1549 1550 case UDAT_YEAR_NAME_FIELD: 1551 if (fSymbols->fShortYearNames != nullptr && value <= fSymbols->fShortYearNamesCount) { 1552 // the Calendar YEAR field runs 1 through 60 for cyclic years 1553 _appendSymbol(appendTo, value - 1, fSymbols->fShortYearNames, fSymbols->fShortYearNamesCount); 1554 break; 1555 } 1556 // else fall through to numeric year handling, do not break here 1557 U_FALLTHROUGH; 1558 1559 // OLD: for "yyyy", write out the whole year; for "yy", write out the last 2 digits 1560 // NEW: UTS#35: 1561 //Year y yy yyy yyyy yyyyy 1562 //AD 1 1 01 001 0001 00001 1563 //AD 12 12 12 012 0012 00012 1564 //AD 123 123 23 123 0123 00123 1565 //AD 1234 1234 34 1234 1234 01234 1566 //AD 12345 12345 45 12345 12345 12345 1567 case UDAT_YEAR_FIELD: 1568 case UDAT_YEAR_WOY_FIELD: 1569 if (fDateOverride.compare(hebr)==0 && value>HEBREW_CAL_CUR_MILLENIUM_START_YEAR && value<HEBREW_CAL_CUR_MILLENIUM_END_YEAR) { 1570 value-=HEBREW_CAL_CUR_MILLENIUM_START_YEAR; 1571 } 1572 if(count == 2) 1573 zeroPaddingNumber(currentNumberFormat, appendTo, value, 2, 2); 1574 else 1575 zeroPaddingNumber(currentNumberFormat, appendTo, value, count, maxIntCount); 1576 break; 1577 1578 // for "MMMM"/"LLLL", write out the whole month name, for "MMM"/"LLL", write out the month 1579 // abbreviation, for "M"/"L" or "MM"/"LL", write out the month as a number with the 1580 // appropriate number of digits 1581 // for "MMMMM"/"LLLLL", use the narrow form 1582 case UDAT_MONTH_FIELD: 1583 case UDAT_STANDALONE_MONTH_FIELD: 1584 if (typeid(cal) == typeid(HebrewCalendar)) { 1585 if (HebrewCalendar::isLeapYear(cal.get(UCAL_YEAR,status)) && value == 6 && count >= 3 ) 1586 value = 13; // Show alternate form for Adar II in leap years in Hebrew calendar. 1587 if (!HebrewCalendar::isLeapYear(cal.get(UCAL_YEAR,status)) && value >= 6 && count < 3 ) 1588 value--; // Adjust the month number down 1 in Hebrew non-leap years, i.e. Adar is 6, not 7. 1589 } 1590 { 1591 int32_t isLeapMonth = (fSymbols->fLeapMonthPatterns != nullptr && fSymbols->fLeapMonthPatternsCount >= DateFormatSymbols::kMonthPatternsCount)? 1592 cal.get(UCAL_IS_LEAP_MONTH, status): 0; 1593 // should consolidate the next section by using arrays of pointers & counts for the right symbols... 1594 if (count == 5) { 1595 if (patternCharIndex == UDAT_MONTH_FIELD) { 1596 _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fNarrowMonths, fSymbols->fNarrowMonthsCount, 1597 (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatNarrow]): nullptr, status); 1598 } else { 1599 _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fStandaloneNarrowMonths, fSymbols->fStandaloneNarrowMonthsCount, 1600 (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneNarrow]): nullptr, status); 1601 } 1602 capContextUsageType = DateFormatSymbols::kCapContextUsageMonthNarrow; 1603 } else if (count == 4) { 1604 if (patternCharIndex == UDAT_MONTH_FIELD) { 1605 _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fMonths, fSymbols->fMonthsCount, 1606 (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatWide]): nullptr, status); 1607 capContextUsageType = DateFormatSymbols::kCapContextUsageMonthFormat; 1608 } else { 1609 _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fStandaloneMonths, fSymbols->fStandaloneMonthsCount, 1610 (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneWide]): nullptr, status); 1611 capContextUsageType = DateFormatSymbols::kCapContextUsageMonthStandalone; 1612 } 1613 } else if (count == 3) { 1614 if (patternCharIndex == UDAT_MONTH_FIELD) { 1615 _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fShortMonths, fSymbols->fShortMonthsCount, 1616 (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatAbbrev]): nullptr, status); 1617 capContextUsageType = DateFormatSymbols::kCapContextUsageMonthFormat; 1618 } else { 1619 _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneShortMonthsCount, 1620 (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneAbbrev]): nullptr, status); 1621 capContextUsageType = DateFormatSymbols::kCapContextUsageMonthStandalone; 1622 } 1623 } else { 1624 UnicodeString monthNumber; 1625 zeroPaddingNumber(currentNumberFormat,monthNumber, value + 1, count, maxIntCount); 1626 _appendSymbolWithMonthPattern(appendTo, 0, &monthNumber, 1, 1627 (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternNumeric]): nullptr, status); 1628 } 1629 } 1630 break; 1631 1632 // for "k" and "kk", write out the hour, adjusting midnight to appear as "24" 1633 case UDAT_HOUR_OF_DAY1_FIELD: 1634 if (value == 0) 1635 zeroPaddingNumber(currentNumberFormat,appendTo, cal.getMaximum(UCAL_HOUR_OF_DAY) + 1, count, maxIntCount); 1636 else 1637 zeroPaddingNumber(currentNumberFormat,appendTo, value, count, maxIntCount); 1638 break; 1639 1640 case UDAT_FRACTIONAL_SECOND_FIELD: 1641 // Fractional seconds left-justify 1642 { 1643 int32_t minDigits = (count > 3) ? 3 : count; 1644 if (count == 1) { 1645 value /= 100; 1646 } else if (count == 2) { 1647 value /= 10; 1648 } 1649 zeroPaddingNumber(currentNumberFormat, appendTo, value, minDigits, maxIntCount); 1650 if (count > 3) { 1651 zeroPaddingNumber(currentNumberFormat, appendTo, 0, count - 3, maxIntCount); 1652 } 1653 } 1654 break; 1655 1656 // for "ee" or "e", use local numeric day-of-the-week 1657 // for "EEEEEE" or "eeeeee", write out the short day-of-the-week name 1658 // for "EEEEE" or "eeeee", write out the narrow day-of-the-week name 1659 // for "EEEE" or "eeee", write out the wide day-of-the-week name 1660 // for "EEE" or "EE" or "E" or "eee", write out the abbreviated day-of-the-week name 1661 case UDAT_DOW_LOCAL_FIELD: 1662 if ( count < 3 ) { 1663 zeroPaddingNumber(currentNumberFormat,appendTo, value, count, maxIntCount); 1664 break; 1665 } 1666 // fall through to EEEEE-EEE handling, but for that we don't want local day-of-week, 1667 // we want standard day-of-week, so first fix value to work for EEEEE-EEE. 1668 value = cal.get(UCAL_DAY_OF_WEEK, status); 1669 if (U_FAILURE(status)) { 1670 return; 1671 } 1672 // fall through, do not break here 1673 U_FALLTHROUGH; 1674 case UDAT_DAY_OF_WEEK_FIELD: 1675 if (count == 5) { 1676 _appendSymbol(appendTo, value, fSymbols->fNarrowWeekdays, 1677 fSymbols->fNarrowWeekdaysCount); 1678 capContextUsageType = DateFormatSymbols::kCapContextUsageDayNarrow; 1679 } else if (count == 4) { 1680 _appendSymbol(appendTo, value, fSymbols->fWeekdays, 1681 fSymbols->fWeekdaysCount); 1682 capContextUsageType = DateFormatSymbols::kCapContextUsageDayFormat; 1683 } else if (count == 6) { 1684 _appendSymbol(appendTo, value, fSymbols->fShorterWeekdays, 1685 fSymbols->fShorterWeekdaysCount); 1686 capContextUsageType = DateFormatSymbols::kCapContextUsageDayFormat; 1687 } else { 1688 _appendSymbol(appendTo, value, fSymbols->fShortWeekdays, 1689 fSymbols->fShortWeekdaysCount); 1690 capContextUsageType = DateFormatSymbols::kCapContextUsageDayFormat; 1691 } 1692 break; 1693 1694 // for "ccc", write out the abbreviated day-of-the-week name 1695 // for "cccc", write out the wide day-of-the-week name 1696 // for "ccccc", use the narrow day-of-the-week name 1697 // for "ccccc", use the short day-of-the-week name 1698 case UDAT_STANDALONE_DAY_FIELD: 1699 if ( count < 3 ) { 1700 zeroPaddingNumber(currentNumberFormat,appendTo, value, 1, maxIntCount); 1701 break; 1702 } 1703 // fall through to alpha DOW handling, but for that we don't want local day-of-week, 1704 // we want standard day-of-week, so first fix value. 1705 value = cal.get(UCAL_DAY_OF_WEEK, status); 1706 if (U_FAILURE(status)) { 1707 return; 1708 } 1709 if (count == 5) { 1710 _appendSymbol(appendTo, value, fSymbols->fStandaloneNarrowWeekdays, 1711 fSymbols->fStandaloneNarrowWeekdaysCount); 1712 capContextUsageType = DateFormatSymbols::kCapContextUsageDayNarrow; 1713 } else if (count == 4) { 1714 _appendSymbol(appendTo, value, fSymbols->fStandaloneWeekdays, 1715 fSymbols->fStandaloneWeekdaysCount); 1716 capContextUsageType = DateFormatSymbols::kCapContextUsageDayStandalone; 1717 } else if (count == 6) { 1718 _appendSymbol(appendTo, value, fSymbols->fStandaloneShorterWeekdays, 1719 fSymbols->fStandaloneShorterWeekdaysCount); 1720 capContextUsageType = DateFormatSymbols::kCapContextUsageDayStandalone; 1721 } else { // count == 3 1722 _appendSymbol(appendTo, value, fSymbols->fStandaloneShortWeekdays, 1723 fSymbols->fStandaloneShortWeekdaysCount); 1724 capContextUsageType = DateFormatSymbols::kCapContextUsageDayStandalone; 1725 } 1726 break; 1727 1728 // for "a" symbol, write out the whole AM/PM string 1729 case UDAT_AM_PM_FIELD: 1730 if (count == 4) { 1731 _appendSymbol(appendTo, value, fSymbols->fWideAmPms, 1732 fSymbols->fWideAmPmsCount); 1733 } else if (count == 5) { 1734 _appendSymbol(appendTo, value, fSymbols->fNarrowAmPms, 1735 fSymbols->fNarrowAmPmsCount); 1736 } else { 1737 _appendSymbol(appendTo, value, fSymbols->fAmPms, 1738 fSymbols->fAmPmsCount); 1739 } 1740 break; 1741 1742 // if we see pattern character for UDAT_TIME_SEPARATOR_FIELD (none currently defined), 1743 // write out the time separator string. Leave support in for future definition. 1744 case UDAT_TIME_SEPARATOR_FIELD: 1745 { 1746 UnicodeString separator; 1747 appendTo += fSymbols->getTimeSeparatorString(separator); 1748 } 1749 break; 1750 1751 // for "h" and "hh", write out the hour, adjusting noon and midnight to show up 1752 // as "12" 1753 case UDAT_HOUR1_FIELD: 1754 if (value == 0) 1755 zeroPaddingNumber(currentNumberFormat,appendTo, cal.getLeastMaximum(UCAL_HOUR) + 1, count, maxIntCount); 1756 else 1757 zeroPaddingNumber(currentNumberFormat,appendTo, value, count, maxIntCount); 1758 break; 1759 1760 case UDAT_TIMEZONE_FIELD: // 'z' 1761 case UDAT_TIMEZONE_RFC_FIELD: // 'Z' 1762 case UDAT_TIMEZONE_GENERIC_FIELD: // 'v' 1763 case UDAT_TIMEZONE_SPECIAL_FIELD: // 'V' 1764 case UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD: // 'O' 1765 case UDAT_TIMEZONE_ISO_FIELD: // 'X' 1766 case UDAT_TIMEZONE_ISO_LOCAL_FIELD: // 'x' 1767 { 1768 char16_t zsbuf[ZONE_NAME_U16_MAX]; 1769 UnicodeString zoneString(zsbuf, 0, UPRV_LENGTHOF(zsbuf)); 1770 const TimeZone& tz = cal.getTimeZone(); 1771 UDate date = cal.getTime(status); 1772 const TimeZoneFormat *tzfmt = tzFormat(status); 1773 if (U_SUCCESS(status)) { 1774 switch (patternCharIndex) { 1775 case UDAT_TIMEZONE_FIELD: 1776 if (count < 4) { 1777 // "z", "zz", "zzz" 1778 tzfmt->format(UTZFMT_STYLE_SPECIFIC_SHORT, tz, date, zoneString); 1779 capContextUsageType = DateFormatSymbols::kCapContextUsageMetazoneShort; 1780 } else { 1781 // "zzzz" or longer 1782 tzfmt->format(UTZFMT_STYLE_SPECIFIC_LONG, tz, date, zoneString); 1783 capContextUsageType = DateFormatSymbols::kCapContextUsageMetazoneLong; 1784 } 1785 break; 1786 case UDAT_TIMEZONE_RFC_FIELD: 1787 if (count < 4) { 1788 // "Z" 1789 tzfmt->format(UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL, tz, date, zoneString); 1790 } else if (count == 5) { 1791 // "ZZZZZ" 1792 tzfmt->format(UTZFMT_STYLE_ISO_EXTENDED_FULL, tz, date, zoneString); 1793 } else { 1794 // "ZZ", "ZZZ", "ZZZZ" 1795 tzfmt->format(UTZFMT_STYLE_LOCALIZED_GMT, tz, date, zoneString); 1796 } 1797 break; 1798 case UDAT_TIMEZONE_GENERIC_FIELD: 1799 if (count == 1) { 1800 // "v" 1801 tzfmt->format(UTZFMT_STYLE_GENERIC_SHORT, tz, date, zoneString); 1802 capContextUsageType = DateFormatSymbols::kCapContextUsageMetazoneShort; 1803 } else if (count == 4) { 1804 // "vvvv" 1805 tzfmt->format(UTZFMT_STYLE_GENERIC_LONG, tz, date, zoneString); 1806 capContextUsageType = DateFormatSymbols::kCapContextUsageMetazoneLong; 1807 } 1808 break; 1809 case UDAT_TIMEZONE_SPECIAL_FIELD: 1810 if (count == 1) { 1811 // "V" 1812 tzfmt->format(UTZFMT_STYLE_ZONE_ID_SHORT, tz, date, zoneString); 1813 } else if (count == 2) { 1814 // "VV" 1815 tzfmt->format(UTZFMT_STYLE_ZONE_ID, tz, date, zoneString); 1816 } else if (count == 3) { 1817 // "VVV" 1818 tzfmt->format(UTZFMT_STYLE_EXEMPLAR_LOCATION, tz, date, zoneString); 1819 } else if (count == 4) { 1820 // "VVVV" 1821 tzfmt->format(UTZFMT_STYLE_GENERIC_LOCATION, tz, date, zoneString); 1822 capContextUsageType = DateFormatSymbols::kCapContextUsageZoneLong; 1823 } 1824 break; 1825 case UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD: 1826 if (count == 1) { 1827 // "O" 1828 tzfmt->format(UTZFMT_STYLE_LOCALIZED_GMT_SHORT, tz, date, zoneString); 1829 } else if (count == 4) { 1830 // "OOOO" 1831 tzfmt->format(UTZFMT_STYLE_LOCALIZED_GMT, tz, date, zoneString); 1832 } 1833 break; 1834 case UDAT_TIMEZONE_ISO_FIELD: 1835 if (count == 1) { 1836 // "X" 1837 tzfmt->format(UTZFMT_STYLE_ISO_BASIC_SHORT, tz, date, zoneString); 1838 } else if (count == 2) { 1839 // "XX" 1840 tzfmt->format(UTZFMT_STYLE_ISO_BASIC_FIXED, tz, date, zoneString); 1841 } else if (count == 3) { 1842 // "XXX" 1843 tzfmt->format(UTZFMT_STYLE_ISO_EXTENDED_FIXED, tz, date, zoneString); 1844 } else if (count == 4) { 1845 // "XXXX" 1846 tzfmt->format(UTZFMT_STYLE_ISO_BASIC_FULL, tz, date, zoneString); 1847 } else if (count == 5) { 1848 // "XXXXX" 1849 tzfmt->format(UTZFMT_STYLE_ISO_EXTENDED_FULL, tz, date, zoneString); 1850 } 1851 break; 1852 case UDAT_TIMEZONE_ISO_LOCAL_FIELD: 1853 if (count == 1) { 1854 // "x" 1855 tzfmt->format(UTZFMT_STYLE_ISO_BASIC_LOCAL_SHORT, tz, date, zoneString); 1856 } else if (count == 2) { 1857 // "xx" 1858 tzfmt->format(UTZFMT_STYLE_ISO_BASIC_LOCAL_FIXED, tz, date, zoneString); 1859 } else if (count == 3) { 1860 // "xxx" 1861 tzfmt->format(UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FIXED, tz, date, zoneString); 1862 } else if (count == 4) { 1863 // "xxxx" 1864 tzfmt->format(UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL, tz, date, zoneString); 1865 } else if (count == 5) { 1866 // "xxxxx" 1867 tzfmt->format(UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FULL, tz, date, zoneString); 1868 } 1869 break; 1870 default: 1871 UPRV_UNREACHABLE_EXIT; 1872 } 1873 } 1874 appendTo += zoneString; 1875 } 1876 break; 1877 1878 case UDAT_QUARTER_FIELD: 1879 if (count >= 5) 1880 _appendSymbol(appendTo, value/3, fSymbols->fNarrowQuarters, 1881 fSymbols->fNarrowQuartersCount); 1882 else if (count == 4) 1883 _appendSymbol(appendTo, value/3, fSymbols->fQuarters, 1884 fSymbols->fQuartersCount); 1885 else if (count == 3) 1886 _appendSymbol(appendTo, value/3, fSymbols->fShortQuarters, 1887 fSymbols->fShortQuartersCount); 1888 else 1889 zeroPaddingNumber(currentNumberFormat,appendTo, (value/3) + 1, count, maxIntCount); 1890 break; 1891 1892 case UDAT_STANDALONE_QUARTER_FIELD: 1893 if (count >= 5) 1894 _appendSymbol(appendTo, value/3, fSymbols->fStandaloneNarrowQuarters, 1895 fSymbols->fStandaloneNarrowQuartersCount); 1896 else if (count == 4) 1897 _appendSymbol(appendTo, value/3, fSymbols->fStandaloneQuarters, 1898 fSymbols->fStandaloneQuartersCount); 1899 else if (count == 3) 1900 _appendSymbol(appendTo, value/3, fSymbols->fStandaloneShortQuarters, 1901 fSymbols->fStandaloneShortQuartersCount); 1902 else 1903 zeroPaddingNumber(currentNumberFormat,appendTo, (value/3) + 1, count, maxIntCount); 1904 break; 1905 1906 case UDAT_AM_PM_MIDNIGHT_NOON_FIELD: 1907 { 1908 const UnicodeString *toAppend = nullptr; 1909 int32_t hour = cal.get(UCAL_HOUR_OF_DAY, status); 1910 1911 // Note: "midnight" can be ambiguous as to whether it refers to beginning of day or end of day. 1912 // For ICU 57 output of "midnight" is temporarily suppressed. 1913 1914 // For "midnight" and "noon": 1915 // Time, as displayed, must be exactly noon or midnight. 1916 // This means minutes and seconds, if present, must be zero. 1917 if ((/*hour == 0 ||*/ hour == 12) && 1918 (!fHasMinute || cal.get(UCAL_MINUTE, status) == 0) && 1919 (!fHasSecond || cal.get(UCAL_SECOND, status) == 0)) { 1920 // Stealing am/pm value to use as our array index. 1921 // It works out: am/midnight are both 0, pm/noon are both 1, 1922 // 12 am is 12 midnight, and 12 pm is 12 noon. 1923 int32_t val = cal.get(UCAL_AM_PM, status); 1924 1925 if (count <= 3) { 1926 toAppend = &fSymbols->fAbbreviatedDayPeriods[val]; 1927 } else if (count == 4 || count > 5) { 1928 toAppend = &fSymbols->fWideDayPeriods[val]; 1929 } else { // count == 5 1930 toAppend = &fSymbols->fNarrowDayPeriods[val]; 1931 } 1932 } 1933 1934 // toAppend is nullptr if time isn't exactly midnight or noon (as displayed). 1935 // toAppend is bogus if time is midnight or noon, but no localized string exists. 1936 // In either case, fall back to am/pm. 1937 if (toAppend == nullptr || toAppend->isBogus()) { 1938 // Reformat with identical arguments except ch, now changed to 'a'. 1939 // We are passing a different fieldToOutput because we want to add 1940 // 'b' to field position. This makes this fallback stable when 1941 // there is a data change on locales. 1942 subFormat(appendTo, u'a', count, capitalizationContext, fieldNum, u'b', handler, cal, status); 1943 return; 1944 } else { 1945 appendTo += *toAppend; 1946 } 1947 1948 break; 1949 } 1950 1951 case UDAT_FLEXIBLE_DAY_PERIOD_FIELD: 1952 { 1953 // TODO: Maybe fetch the DayperiodRules during initialization (instead of at the first 1954 // loading of an instance) if a relevant pattern character (b or B) is used. 1955 const DayPeriodRules *ruleSet = DayPeriodRules::getInstance(this->getSmpFmtLocale(), status); 1956 if (U_FAILURE(status)) { 1957 // Data doesn't conform to spec, therefore loading failed. 1958 break; 1959 } 1960 if (ruleSet == nullptr) { 1961 // Data doesn't exist for the locale we're looking for. 1962 // Falling back to am/pm. 1963 // We are passing a different fieldToOutput because we want to add 1964 // 'B' to field position. This makes this fallback stable when 1965 // there is a data change on locales. 1966 subFormat(appendTo, u'a', count, capitalizationContext, fieldNum, u'B', handler, cal, status); 1967 return; 1968 } 1969 1970 // Get current display time. 1971 int32_t hour = cal.get(UCAL_HOUR_OF_DAY, status); 1972 int32_t minute = 0; 1973 if (fHasMinute) { 1974 minute = cal.get(UCAL_MINUTE, status); 1975 } 1976 int32_t second = 0; 1977 if (fHasSecond) { 1978 second = cal.get(UCAL_SECOND, status); 1979 } 1980 1981 // Determine day period. 1982 DayPeriodRules::DayPeriod periodType; 1983 if (hour == 0 && minute == 0 && second == 0 && ruleSet->hasMidnight()) { 1984 periodType = DayPeriodRules::DAYPERIOD_MIDNIGHT; 1985 } else if (hour == 12 && minute == 0 && second == 0 && ruleSet->hasNoon()) { 1986 periodType = DayPeriodRules::DAYPERIOD_NOON; 1987 } else { 1988 periodType = ruleSet->getDayPeriodForHour(hour); 1989 } 1990 1991 // Rule set exists, therefore periodType can't be UNKNOWN. 1992 // Get localized string. 1993 U_ASSERT(periodType != DayPeriodRules::DAYPERIOD_UNKNOWN); 1994 UnicodeString *toAppend = nullptr; 1995 int32_t index; 1996 1997 // Note: "midnight" can be ambiguous as to whether it refers to beginning of day or end of day. 1998 // For ICU 57 output of "midnight" is temporarily suppressed. 1999 2000 if (periodType != DayPeriodRules::DAYPERIOD_AM && 2001 periodType != DayPeriodRules::DAYPERIOD_PM && 2002 periodType != DayPeriodRules::DAYPERIOD_MIDNIGHT) { 2003 index = static_cast<int32_t>(periodType); 2004 if (count <= 3) { 2005 toAppend = &fSymbols->fAbbreviatedDayPeriods[index]; // i.e. short 2006 } else if (count == 4 || count > 5) { 2007 toAppend = &fSymbols->fWideDayPeriods[index]; 2008 } else { // count == 5 2009 toAppend = &fSymbols->fNarrowDayPeriods[index]; 2010 } 2011 } 2012 2013 // Fallback schedule: 2014 // Midnight/Noon -> General Periods -> AM/PM. 2015 2016 // Midnight/Noon -> General Periods. 2017 if ((toAppend == nullptr || toAppend->isBogus()) && 2018 (periodType == DayPeriodRules::DAYPERIOD_MIDNIGHT || 2019 periodType == DayPeriodRules::DAYPERIOD_NOON)) { 2020 periodType = ruleSet->getDayPeriodForHour(hour); 2021 index = static_cast<int32_t>(periodType); 2022 2023 if (count <= 3) { 2024 toAppend = &fSymbols->fAbbreviatedDayPeriods[index]; // i.e. short 2025 } else if (count == 4 || count > 5) { 2026 toAppend = &fSymbols->fWideDayPeriods[index]; 2027 } else { // count == 5 2028 toAppend = &fSymbols->fNarrowDayPeriods[index]; 2029 } 2030 } 2031 2032 // General Periods -> AM/PM. 2033 if (periodType == DayPeriodRules::DAYPERIOD_AM || 2034 periodType == DayPeriodRules::DAYPERIOD_PM || 2035 toAppend->isBogus()) { 2036 // We are passing a different fieldToOutput because we want to add 2037 // 'B' to field position iterator. This makes this fallback stable when 2038 // there is a data change on locales. 2039 subFormat(appendTo, u'a', count, capitalizationContext, fieldNum, u'B', handler, cal, status); 2040 return; 2041 } 2042 else { 2043 appendTo += *toAppend; 2044 } 2045 2046 break; 2047 } 2048 2049 // all of the other pattern symbols can be formatted as simple numbers with 2050 // appropriate zero padding 2051 default: 2052 zeroPaddingNumber(currentNumberFormat,appendTo, value, count, maxIntCount); 2053 break; 2054 } 2055 #if !UCONFIG_NO_BREAK_ITERATION 2056 // if first field, check to see whether we need to and are able to titlecase it 2057 if (fieldNum == 0 && fCapitalizationBrkIter != nullptr && appendTo.length() > beginOffset && 2058 u_islower(appendTo.char32At(beginOffset))) { 2059 UBool titlecase = false; 2060 switch (capitalizationContext) { 2061 case UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE: 2062 titlecase = true; 2063 break; 2064 case UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU: 2065 titlecase = fSymbols->fCapitalization[capContextUsageType][0]; 2066 break; 2067 case UDISPCTX_CAPITALIZATION_FOR_STANDALONE: 2068 titlecase = fSymbols->fCapitalization[capContextUsageType][1]; 2069 break; 2070 default: 2071 // titlecase = false; 2072 break; 2073 } 2074 if (titlecase) { 2075 BreakIterator* const mutableCapitalizationBrkIter = fCapitalizationBrkIter->clone(); 2076 UnicodeString firstField(appendTo, beginOffset); 2077 firstField.toTitle(mutableCapitalizationBrkIter, fLocale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT); 2078 appendTo.replaceBetween(beginOffset, appendTo.length(), firstField); 2079 delete mutableCapitalizationBrkIter; 2080 } 2081 } 2082 #endif 2083 2084 handler.addAttribute(DateFormatSymbols::getPatternCharIndex(fieldToOutput), beginOffset, appendTo.length()); 2085 } 2086 2087 //---------------------------------------------------------------------- 2088 2089 void SimpleDateFormat::adoptNumberFormat(NumberFormat *formatToAdopt) { 2090 // Null out the fast formatter, it references fNumberFormat which we're 2091 // about to invalidate 2092 delete fSimpleNumberFormatter; 2093 fSimpleNumberFormatter = nullptr; 2094 2095 fixNumberFormatForDates(*formatToAdopt); 2096 delete fNumberFormat; 2097 fNumberFormat = formatToAdopt; 2098 2099 // We successfully set the default number format. Now delete the overrides 2100 // (can't fail). 2101 if (fSharedNumberFormatters) { 2102 freeSharedNumberFormatters(fSharedNumberFormatters); 2103 fSharedNumberFormatters = nullptr; 2104 } 2105 2106 // Recompute fSimpleNumberFormatter if necessary 2107 UErrorCode localStatus = U_ZERO_ERROR; 2108 initSimpleNumberFormatter(localStatus); 2109 } 2110 2111 void SimpleDateFormat::adoptNumberFormat(const UnicodeString& fields, NumberFormat *formatToAdopt, UErrorCode &status){ 2112 fixNumberFormatForDates(*formatToAdopt); 2113 LocalPointer<NumberFormat> fmt(formatToAdopt); 2114 if (U_FAILURE(status)) { 2115 return; 2116 } 2117 2118 // We must ensure fSharedNumberFormatters is allocated. 2119 if (fSharedNumberFormatters == nullptr) { 2120 fSharedNumberFormatters = allocSharedNumberFormatters(); 2121 if (fSharedNumberFormatters == nullptr) { 2122 status = U_MEMORY_ALLOCATION_ERROR; 2123 return; 2124 } 2125 } 2126 const SharedNumberFormat *newFormat = createSharedNumberFormat(fmt.orphan()); 2127 if (newFormat == nullptr) { 2128 status = U_MEMORY_ALLOCATION_ERROR; 2129 return; 2130 } 2131 for (int i=0; i<fields.length(); i++) { 2132 char16_t field = fields.charAt(i); 2133 // if the pattern character is unrecognized, signal an error and bail out 2134 UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(field); 2135 if (patternCharIndex == UDAT_FIELD_COUNT) { 2136 status = U_INVALID_FORMAT_ERROR; 2137 newFormat->deleteIfZeroRefCount(); 2138 return; 2139 } 2140 2141 // Set the number formatter in the table 2142 SharedObject::copyPtr( 2143 newFormat, fSharedNumberFormatters[patternCharIndex]); 2144 } 2145 newFormat->deleteIfZeroRefCount(); 2146 } 2147 2148 const NumberFormat * 2149 SimpleDateFormat::getNumberFormatForField(char16_t field) const { 2150 UDateFormatField index = DateFormatSymbols::getPatternCharIndex(field); 2151 if (index == UDAT_FIELD_COUNT) { 2152 return nullptr; 2153 } 2154 return getNumberFormatByIndex(index); 2155 } 2156 2157 //---------------------------------------------------------------------- 2158 void 2159 SimpleDateFormat::zeroPaddingNumber( 2160 const NumberFormat *currentNumberFormat, 2161 UnicodeString &appendTo, 2162 int32_t value, int32_t minDigits, int32_t maxDigits) const 2163 { 2164 2165 if (currentNumberFormat == fNumberFormat && fSimpleNumberFormatter) { 2166 // Can use fast path 2167 // We create UFormattedNumberData ourselves to avoid a heap allocation 2168 // and corresponding free. Set the pointer to null afterwards to prevent 2169 // the implementation from attempting to free it. 2170 UErrorCode localStatus = U_ZERO_ERROR; 2171 number::impl::UFormattedNumberData data; 2172 data.quantity.setToLong(value); 2173 number::SimpleNumber number(&data, localStatus); 2174 number.setMinimumIntegerDigits(minDigits, localStatus); 2175 number.setMaximumIntegerDigits(maxDigits, localStatus); 2176 2177 number::FormattedNumber result = fSimpleNumberFormatter->format(std::move(number), localStatus); 2178 if (U_FAILURE(localStatus)) { 2179 result.fData = nullptr; 2180 return; 2181 } 2182 UnicodeStringAppendable appendable(appendTo); 2183 result.appendTo(appendable, localStatus); 2184 result.fData = nullptr; 2185 return; 2186 } 2187 2188 // Check for RBNF (no clone necessary) 2189 const auto* rbnf = dynamic_cast<const RuleBasedNumberFormat*>(currentNumberFormat); 2190 if (rbnf != nullptr) { 2191 FieldPosition pos(FieldPosition::DONT_CARE); 2192 rbnf->format(value, appendTo, pos); // 3rd arg is there to speed up processing 2193 return; 2194 } 2195 2196 // Fall back to slow path (clone and mutate the NumberFormat) 2197 if (currentNumberFormat != nullptr) { 2198 FieldPosition pos(FieldPosition::DONT_CARE); 2199 LocalPointer<NumberFormat> nf(currentNumberFormat->clone()); 2200 nf->setMinimumIntegerDigits(minDigits); 2201 nf->setMaximumIntegerDigits(maxDigits); 2202 nf->format(value, appendTo, pos); // 3rd arg is there to speed up processing 2203 } 2204 } 2205 2206 //---------------------------------------------------------------------- 2207 2208 /** 2209 * Return true if the given format character, occurring count 2210 * times, represents a numeric field. 2211 */ 2212 UBool SimpleDateFormat::isNumeric(char16_t formatChar, int32_t count) { 2213 return DateFormatSymbols::isNumericPatternChar(formatChar, count); 2214 } 2215 2216 UBool 2217 SimpleDateFormat::isAtNumericField(const UnicodeString &pattern, int32_t patternOffset) { 2218 if (patternOffset >= pattern.length()) { 2219 // not at any field 2220 return false; 2221 } 2222 char16_t ch = pattern.charAt(patternOffset); 2223 UDateFormatField f = DateFormatSymbols::getPatternCharIndex(ch); 2224 if (f == UDAT_FIELD_COUNT) { 2225 // not at any field 2226 return false; 2227 } 2228 int32_t i = patternOffset; 2229 while (pattern.charAt(++i) == ch) {} 2230 return DateFormatSymbols::isNumericField(f, i - patternOffset); 2231 } 2232 2233 UBool 2234 SimpleDateFormat::isAfterNonNumericField(const UnicodeString &pattern, int32_t patternOffset) { 2235 if (patternOffset <= 0) { 2236 // not after any field 2237 return false; 2238 } 2239 char16_t ch = pattern.charAt(--patternOffset); 2240 UDateFormatField f = DateFormatSymbols::getPatternCharIndex(ch); 2241 if (f == UDAT_FIELD_COUNT) { 2242 // not after any field 2243 return false; 2244 } 2245 int32_t i = patternOffset; 2246 while (pattern.charAt(--i) == ch) {} 2247 return !DateFormatSymbols::isNumericField(f, patternOffset - i); 2248 } 2249 2250 void 2251 SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& parsePos) const 2252 { 2253 UErrorCode status = U_ZERO_ERROR; 2254 int32_t pos = parsePos.getIndex(); 2255 if(parsePos.getIndex() < 0) { 2256 parsePos.setErrorIndex(0); 2257 return; 2258 } 2259 int32_t start = pos; 2260 2261 // Hold the day period until everything else is parsed, because we need 2262 // the hour to interpret time correctly. 2263 int32_t dayPeriodInt = -1; 2264 2265 UBool ambiguousYear[] = { false }; 2266 int32_t saveHebrewMonth = -1; 2267 int32_t count = 0; 2268 UTimeZoneFormatTimeType tzTimeType = UTZFMT_TIME_TYPE_UNKNOWN; 2269 2270 // For parsing abutting numeric fields. 'abutPat' is the 2271 // offset into 'pattern' of the first of 2 or more abutting 2272 // numeric fields. 'abutStart' is the offset into 'text' 2273 // where parsing the fields begins. 'abutPass' starts off as 0 2274 // and increments each time we try to parse the fields. 2275 int32_t abutPat = -1; // If >=0, we are in a run of abutting numeric fields 2276 int32_t abutStart = 0; 2277 int32_t abutPass = 0; 2278 UBool inQuote = false; 2279 2280 MessageFormat * numericLeapMonthFormatter = nullptr; 2281 2282 Calendar* calClone = nullptr; 2283 Calendar *workCal = &cal; 2284 if (&cal != fCalendar && typeid(cal) != typeid(*fCalendar)) { 2285 // Different calendar type 2286 // We use the time/zone from the input calendar, but 2287 // do not use the input calendar for field calculation. 2288 calClone = fCalendar->clone(); 2289 if (calClone != nullptr) { 2290 calClone->setTime(cal.getTime(status),status); 2291 if (U_FAILURE(status)) { 2292 goto ExitParse; 2293 } 2294 calClone->setTimeZone(cal.getTimeZone()); 2295 workCal = calClone; 2296 } else { 2297 status = U_MEMORY_ALLOCATION_ERROR; 2298 goto ExitParse; 2299 } 2300 } 2301 2302 if (fSymbols->fLeapMonthPatterns != nullptr && fSymbols->fLeapMonthPatternsCount >= DateFormatSymbols::kMonthPatternsCount) { 2303 numericLeapMonthFormatter = new MessageFormat(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternNumeric], fLocale, status); 2304 if (numericLeapMonthFormatter == nullptr) { 2305 status = U_MEMORY_ALLOCATION_ERROR; 2306 goto ExitParse; 2307 } else if (U_FAILURE(status)) { 2308 goto ExitParse; // this will delete numericLeapMonthFormatter 2309 } 2310 } 2311 2312 for (int32_t i=0; i<fPattern.length(); ++i) { 2313 char16_t ch = fPattern.charAt(i); 2314 2315 // Handle alphabetic field characters. 2316 if (!inQuote && isSyntaxChar(ch)) { 2317 int32_t fieldPat = i; 2318 2319 // Count the length of this field specifier 2320 count = 1; 2321 while ((i+1)<fPattern.length() && 2322 fPattern.charAt(i+1) == ch) { 2323 ++count; 2324 ++i; 2325 } 2326 2327 if (isNumeric(ch, count)) { 2328 if (abutPat < 0) { 2329 // Determine if there is an abutting numeric field. 2330 // Record the start of a set of abutting numeric fields. 2331 if (isAtNumericField(fPattern, i + 1)) { 2332 abutPat = fieldPat; 2333 abutStart = pos; 2334 abutPass = 0; 2335 } 2336 } 2337 } else { 2338 abutPat = -1; // End of any abutting fields 2339 } 2340 2341 // Handle fields within a run of abutting numeric fields. Take 2342 // the pattern "HHmmss" as an example. We will try to parse 2343 // 2/2/2 characters of the input text, then if that fails, 2344 // 1/2/2. We only adjust the width of the leftmost field; the 2345 // others remain fixed. This allows "123456" => 12:34:56, but 2346 // "12345" => 1:23:45. Likewise, for the pattern "yyyyMMdd" we 2347 // try 4/2/2, 3/2/2, 2/2/2, and finally 1/2/2. 2348 if (abutPat >= 0) { 2349 // If we are at the start of a run of abutting fields, then 2350 // shorten this field in each pass. If we can't shorten 2351 // this field any more, then the parse of this set of 2352 // abutting numeric fields has failed. 2353 if (fieldPat == abutPat) { 2354 count -= abutPass++; 2355 if (count == 0) { 2356 status = U_PARSE_ERROR; 2357 goto ExitParse; 2358 } 2359 } 2360 2361 pos = subParse(text, pos, ch, count, 2362 true, false, ambiguousYear, saveHebrewMonth, *workCal, i, numericLeapMonthFormatter, &tzTimeType); 2363 2364 // If the parse fails anywhere in the run, back up to the 2365 // start of the run and retry. 2366 if (pos < 0) { 2367 i = abutPat - 1; 2368 pos = abutStart; 2369 continue; 2370 } 2371 } 2372 2373 // Handle non-numeric fields and non-abutting numeric 2374 // fields. 2375 else if (ch != 0x6C) { // pattern char 'l' (SMALL LETTER L) just gets ignored 2376 int32_t s = subParse(text, pos, ch, count, 2377 false, true, ambiguousYear, saveHebrewMonth, *workCal, i, numericLeapMonthFormatter, &tzTimeType, &dayPeriodInt); 2378 2379 if (s == -pos-1) { 2380 // era not present, in special cases allow this to continue 2381 // from the position where the era was expected 2382 s = pos; 2383 2384 if (i+1 < fPattern.length()) { 2385 // move to next pattern character 2386 char16_t c = fPattern.charAt(i+1); 2387 2388 // check for whitespace 2389 if (PatternProps::isWhiteSpace(c)) { 2390 i++; 2391 // Advance over run in pattern 2392 while ((i+1)<fPattern.length() && 2393 PatternProps::isWhiteSpace(fPattern.charAt(i+1))) { 2394 ++i; 2395 } 2396 } 2397 } 2398 } 2399 else if (s <= 0) { 2400 status = U_PARSE_ERROR; 2401 goto ExitParse; 2402 } 2403 pos = s; 2404 } 2405 } 2406 2407 // Handle literal pattern characters. These are any 2408 // quoted characters and non-alphabetic unquoted 2409 // characters. 2410 else { 2411 2412 abutPat = -1; // End of any abutting fields 2413 2414 if (! matchLiterals(fPattern, i, text, pos, getBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, status), getBooleanAttribute(UDAT_PARSE_PARTIAL_LITERAL_MATCH, status), isLenient())) { 2415 status = U_PARSE_ERROR; 2416 goto ExitParse; 2417 } 2418 } 2419 } 2420 2421 // Special hack for trailing "." after non-numeric field. 2422 if (text.charAt(pos) == 0x2e && getBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, status)) { 2423 // only do if the last field is not numeric 2424 if (isAfterNonNumericField(fPattern, fPattern.length())) { 2425 pos++; // skip the extra "." 2426 } 2427 } 2428 2429 // If dayPeriod is set, use it in conjunction with hour-of-day to determine am/pm. 2430 if (dayPeriodInt >= 0) { 2431 DayPeriodRules::DayPeriod dayPeriod = static_cast<DayPeriodRules::DayPeriod>(dayPeriodInt); 2432 const DayPeriodRules *ruleSet = DayPeriodRules::getInstance(this->getSmpFmtLocale(), status); 2433 2434 if (!cal.isSet(UCAL_HOUR) && !cal.isSet(UCAL_HOUR_OF_DAY)) { 2435 // If hour is not set, set time to the midpoint of current day period, overwriting 2436 // minutes if it's set. 2437 double midPoint = ruleSet->getMidPointForDayPeriod(dayPeriod, status); 2438 2439 // If we can't get midPoint we do nothing. 2440 if (U_SUCCESS(status)) { 2441 // Truncate midPoint toward zero to get the hour. 2442 // Any leftover means it was a half-hour. 2443 int32_t midPointHour = static_cast<int32_t>(midPoint); 2444 int32_t midPointMinute = (midPoint - midPointHour) > 0 ? 30 : 0; 2445 2446 // No need to set am/pm because hour-of-day is set last therefore takes precedence. 2447 cal.set(UCAL_HOUR_OF_DAY, midPointHour); 2448 cal.set(UCAL_MINUTE, midPointMinute); 2449 } 2450 } else { 2451 int hourOfDay; 2452 2453 if (cal.isSet(UCAL_HOUR_OF_DAY)) { // Hour is parsed in 24-hour format. 2454 hourOfDay = cal.get(UCAL_HOUR_OF_DAY, status); 2455 } else { // Hour is parsed in 12-hour format. 2456 hourOfDay = cal.get(UCAL_HOUR, status); 2457 // cal.get() turns 12 to 0 for 12-hour time; change 0 to 12 2458 // so 0 unambiguously means a 24-hour time from above. 2459 if (hourOfDay == 0) { hourOfDay = 12; } 2460 } 2461 U_ASSERT(0 <= hourOfDay && hourOfDay <= 23); 2462 2463 2464 // If hour-of-day is 0 or 13 thru 23 then input time in unambiguously in 24-hour format. 2465 if (hourOfDay == 0 || (13 <= hourOfDay && hourOfDay <= 23)) { 2466 // Make hour-of-day take precedence over (hour + am/pm) by setting it again. 2467 cal.set(UCAL_HOUR_OF_DAY, hourOfDay); 2468 } else { 2469 // We have a 12-hour time and need to choose between am and pm. 2470 // Behave as if dayPeriod spanned 6 hours each way from its center point. 2471 // This will parse correctly for consistent time + period (e.g. 10 at night) as 2472 // well as provide a reasonable recovery for inconsistent time + period (e.g. 2473 // 9 in the afternoon). 2474 2475 // Assume current time is in the AM. 2476 // - Change 12 back to 0 for easier handling of 12am. 2477 // - Append minutes as fractional hours because e.g. 8:15 and 8:45 could be parsed 2478 // into different half-days if center of dayPeriod is at 14:30. 2479 // - cal.get(MINUTE) will return 0 if MINUTE is unset, which works. 2480 if (hourOfDay == 12) { hourOfDay = 0; } 2481 double currentHour = hourOfDay + (cal.get(UCAL_MINUTE, status)) / 60.0; 2482 double midPointHour = ruleSet->getMidPointForDayPeriod(dayPeriod, status); 2483 2484 if (U_SUCCESS(status)) { 2485 double hoursAheadMidPoint = currentHour - midPointHour; 2486 2487 // Assume current time is in the AM. 2488 if (-6 <= hoursAheadMidPoint && hoursAheadMidPoint < 6) { 2489 // Assumption holds; set time as such. 2490 cal.set(UCAL_AM_PM, 0); 2491 } else { 2492 cal.set(UCAL_AM_PM, 1); 2493 } 2494 } 2495 } 2496 } 2497 } 2498 2499 // At this point the fields of Calendar have been set. Calendar 2500 // will fill in default values for missing fields when the time 2501 // is computed. 2502 2503 parsePos.setIndex(pos); 2504 2505 // This part is a problem: When we call parsedDate.after, we compute the time. 2506 // Take the date April 3 2004 at 2:30 am. When this is first set up, the year 2507 // will be wrong if we're parsing a 2-digit year pattern. It will be 1904. 2508 // April 3 1904 is a Sunday (unlike 2004) so it is the DST onset day. 2:30 am 2509 // is therefore an "impossible" time, since the time goes from 1:59 to 3:00 am 2510 // on that day. It is therefore parsed out to fields as 3:30 am. Then we 2511 // add 100 years, and get April 3 2004 at 3:30 am. Note that April 3 2004 is 2512 // a Saturday, so it can have a 2:30 am -- and it should. [LIU] 2513 /* 2514 UDate parsedDate = calendar.getTime(); 2515 if( ambiguousYear[0] && !parsedDate.after(fDefaultCenturyStart) ) { 2516 calendar.add(Calendar.YEAR, 100); 2517 parsedDate = calendar.getTime(); 2518 } 2519 */ 2520 // Because of the above condition, save off the fields in case we need to readjust. 2521 // The procedure we use here is not particularly efficient, but there is no other 2522 // way to do this given the API restrictions present in Calendar. We minimize 2523 // inefficiency by only performing this computation when it might apply, that is, 2524 // when the two-digit year is equal to the start year, and thus might fall at the 2525 // front or the back of the default century. This only works because we adjust 2526 // the year correctly to start with in other cases -- see subParse(). 2527 if (ambiguousYear[0] || tzTimeType != UTZFMT_TIME_TYPE_UNKNOWN) // If this is true then the two-digit year == the default start year 2528 { 2529 // We need a copy of the fields, and we need to avoid triggering a call to 2530 // complete(), which will recalculate the fields. Since we can't access 2531 // the fields[] array in Calendar, we clone the entire object. This will 2532 // stop working if Calendar.clone() is ever rewritten to call complete(). 2533 Calendar *copy; 2534 if (ambiguousYear[0]) { 2535 copy = cal.clone(); 2536 // Check for failed cloning. 2537 if (copy == nullptr) { 2538 status = U_MEMORY_ALLOCATION_ERROR; 2539 goto ExitParse; 2540 } 2541 UDate parsedDate = copy->getTime(status); 2542 // {sfb} check internalGetDefaultCenturyStart 2543 if (fHaveDefaultCentury && (parsedDate < fDefaultCenturyStart)) { 2544 // We can't use add here because that does a complete() first. 2545 cal.set(UCAL_YEAR, fDefaultCenturyStartYear + 100); 2546 } 2547 delete copy; 2548 } 2549 2550 if (tzTimeType != UTZFMT_TIME_TYPE_UNKNOWN) { 2551 copy = cal.clone(); 2552 // Check for failed cloning. 2553 if (copy == nullptr) { 2554 status = U_MEMORY_ALLOCATION_ERROR; 2555 goto ExitParse; 2556 } 2557 const TimeZone & tz = cal.getTimeZone(); 2558 BasicTimeZone *btz = nullptr; 2559 2560 if (dynamic_cast<const OlsonTimeZone *>(&tz) != nullptr 2561 || dynamic_cast<const SimpleTimeZone *>(&tz) != nullptr 2562 || dynamic_cast<const RuleBasedTimeZone *>(&tz) != nullptr 2563 || dynamic_cast<const VTimeZone *>(&tz) != nullptr) { 2564 btz = (BasicTimeZone*)&tz; 2565 } 2566 2567 // Get local millis 2568 copy->set(UCAL_ZONE_OFFSET, 0); 2569 copy->set(UCAL_DST_OFFSET, 0); 2570 UDate localMillis = copy->getTime(status); 2571 2572 // Make sure parsed time zone type (Standard or Daylight) 2573 // matches the rule used by the parsed time zone. 2574 int32_t raw, dst; 2575 if (btz != nullptr) { 2576 if (tzTimeType == UTZFMT_TIME_TYPE_STANDARD) { 2577 btz->getOffsetFromLocal(localMillis, 2578 UCAL_TZ_LOCAL_STANDARD_FORMER, UCAL_TZ_LOCAL_STANDARD_LATTER, raw, dst, status); 2579 } else { 2580 btz->getOffsetFromLocal(localMillis, 2581 UCAL_TZ_LOCAL_DAYLIGHT_FORMER, UCAL_TZ_LOCAL_DAYLIGHT_LATTER, raw, dst, status); 2582 } 2583 } else { 2584 // No good way to resolve ambiguous time at transition, 2585 // but following code work in most case. 2586 tz.getOffset(localMillis, true, raw, dst, status); 2587 } 2588 2589 // Now, compare the results with parsed type, either standard or daylight saving time 2590 int32_t resolvedSavings = dst; 2591 if (tzTimeType == UTZFMT_TIME_TYPE_STANDARD) { 2592 if (dst != 0) { 2593 // Override DST_OFFSET = 0 in the result calendar 2594 resolvedSavings = 0; 2595 } 2596 } else { // tztype == TZTYPE_DST 2597 if (dst == 0) { 2598 if (btz != nullptr) { 2599 // This implementation resolves daylight saving time offset 2600 // closest rule after the given time. 2601 UDate baseTime = localMillis + raw; 2602 UDate time = baseTime; 2603 UDate limit = baseTime + MAX_DAYLIGHT_DETECTION_RANGE; 2604 TimeZoneTransition trs; 2605 UBool trsAvail; 2606 2607 // Search for DST rule after the given time 2608 while (time < limit) { 2609 trsAvail = btz->getNextTransition(time, false, trs); 2610 if (!trsAvail) { 2611 break; 2612 } 2613 resolvedSavings = trs.getTo()->getDSTSavings(); 2614 if (resolvedSavings != 0) { 2615 break; 2616 } 2617 time = trs.getTime(); 2618 } 2619 2620 if (resolvedSavings == 0) { 2621 // If no DST rule after the given time was found, search for 2622 // DST rule before. 2623 time = baseTime; 2624 limit = baseTime - MAX_DAYLIGHT_DETECTION_RANGE; 2625 while (time > limit) { 2626 trsAvail = btz->getPreviousTransition(time, true, trs); 2627 if (!trsAvail) { 2628 break; 2629 } 2630 resolvedSavings = trs.getFrom()->getDSTSavings(); 2631 if (resolvedSavings != 0) { 2632 break; 2633 } 2634 time = trs.getTime() - 1; 2635 } 2636 2637 if (resolvedSavings == 0) { 2638 resolvedSavings = btz->getDSTSavings(); 2639 } 2640 } 2641 } else { 2642 resolvedSavings = tz.getDSTSavings(); 2643 } 2644 if (resolvedSavings == 0) { 2645 // final fallback 2646 resolvedSavings = U_MILLIS_PER_HOUR; 2647 } 2648 } 2649 } 2650 cal.set(UCAL_ZONE_OFFSET, raw); 2651 cal.set(UCAL_DST_OFFSET, resolvedSavings); 2652 delete copy; 2653 } 2654 } 2655 ExitParse: 2656 // Set the parsed result if local calendar is used 2657 // instead of the input calendar 2658 if (U_SUCCESS(status) && workCal != &cal) { 2659 cal.setTimeZone(workCal->getTimeZone()); 2660 cal.setTime(workCal->getTime(status), status); 2661 } 2662 2663 delete numericLeapMonthFormatter; 2664 delete calClone; 2665 2666 // If any Calendar calls failed, we pretend that we 2667 // couldn't parse the string, when in reality this isn't quite accurate-- 2668 // we did parse it; the Calendar calls just failed. 2669 if (U_FAILURE(status)) { 2670 parsePos.setErrorIndex(pos); 2671 parsePos.setIndex(start); 2672 } 2673 } 2674 2675 //---------------------------------------------------------------------- 2676 2677 static int32_t 2678 matchStringWithOptionalDot(const UnicodeString &text, 2679 int32_t index, 2680 const UnicodeString &data); 2681 2682 int32_t SimpleDateFormat::matchQuarterString(const UnicodeString& text, 2683 int32_t start, 2684 UCalendarDateFields field, 2685 const UnicodeString* data, 2686 int32_t dataCount, 2687 Calendar& cal) const 2688 { 2689 int32_t i = 0; 2690 int32_t count = dataCount; 2691 2692 // There may be multiple strings in the data[] array which begin with 2693 // the same prefix (e.g., Cerven and Cervenec (June and July) in Czech). 2694 // We keep track of the longest match, and return that. Note that this 2695 // unfortunately requires us to test all array elements. 2696 int32_t bestMatchLength = 0, bestMatch = -1; 2697 UnicodeString bestMatchName; 2698 2699 for (; i < count; ++i) { 2700 int32_t matchLength = 0; 2701 if ((matchLength = matchStringWithOptionalDot(text, start, data[i])) > bestMatchLength) { 2702 bestMatchLength = matchLength; 2703 bestMatch = i; 2704 } 2705 } 2706 2707 if (bestMatch >= 0) { 2708 cal.set(field, bestMatch * 3); 2709 return start + bestMatchLength; 2710 } 2711 2712 return -start; 2713 } 2714 2715 int32_t SimpleDateFormat::matchDayPeriodStrings(const UnicodeString& text, int32_t start, 2716 const UnicodeString* data, int32_t dataCount, 2717 int32_t &dayPeriod) const 2718 { 2719 2720 int32_t bestMatchLength = 0, bestMatch = -1; 2721 2722 for (int32_t i = 0; i < dataCount; ++i) { 2723 int32_t matchLength = 0; 2724 if ((matchLength = matchStringWithOptionalDot(text, start, data[i])) > bestMatchLength) { 2725 bestMatchLength = matchLength; 2726 bestMatch = i; 2727 } 2728 } 2729 2730 if (bestMatch >= 0) { 2731 dayPeriod = bestMatch; 2732 return start + bestMatchLength; 2733 } 2734 2735 return -start; 2736 } 2737 2738 //---------------------------------------------------------------------- 2739 UBool SimpleDateFormat::matchLiterals(const UnicodeString &pattern, 2740 int32_t &patternOffset, 2741 const UnicodeString &text, 2742 int32_t &textOffset, 2743 UBool whitespaceLenient, 2744 UBool partialMatchLenient, 2745 UBool oldLeniency) 2746 { 2747 UBool inQuote = false; 2748 UnicodeString literal; 2749 int32_t i = patternOffset; 2750 2751 // scan pattern looking for contiguous literal characters 2752 for ( ; i < pattern.length(); i += 1) { 2753 char16_t ch = pattern.charAt(i); 2754 2755 if (!inQuote && isSyntaxChar(ch)) { 2756 break; 2757 } 2758 2759 if (ch == QUOTE) { 2760 // Match a quote literal ('') inside OR outside of quotes 2761 if ((i + 1) < pattern.length() && pattern.charAt(i + 1) == QUOTE) { 2762 i += 1; 2763 } else { 2764 inQuote = !inQuote; 2765 continue; 2766 } 2767 } 2768 2769 literal += ch; 2770 } 2771 2772 // at this point, literal contains the literal text 2773 // and i is the index of the next non-literal pattern character. 2774 int32_t p; 2775 int32_t t = textOffset; 2776 2777 if (whitespaceLenient) { 2778 // trim leading, trailing whitespace from 2779 // the literal text 2780 literal.trim(); 2781 2782 // ignore any leading whitespace in the text 2783 while (t < text.length() && u_isWhitespace(text.charAt(t))) { 2784 t += 1; 2785 } 2786 } 2787 2788 for (p = 0; p < literal.length() && t < text.length();) { 2789 UBool needWhitespace = false; 2790 2791 while (p < literal.length() && PatternProps::isWhiteSpace(literal.charAt(p))) { 2792 needWhitespace = true; 2793 p += 1; 2794 } 2795 2796 if (needWhitespace) { 2797 int32_t tStart = t; 2798 2799 while (t < text.length()) { 2800 char16_t tch = text.charAt(t); 2801 2802 if (!u_isUWhiteSpace(tch) && !PatternProps::isWhiteSpace(tch)) { 2803 break; 2804 } 2805 2806 t += 1; 2807 } 2808 2809 // TODO: should we require internal spaces 2810 // in lenient mode? (There won't be any 2811 // leading or trailing spaces) 2812 if (!whitespaceLenient && t == tStart) { 2813 // didn't find matching whitespace: 2814 // an error in strict mode 2815 return false; 2816 } 2817 2818 // In strict mode, this run of whitespace 2819 // may have been at the end. 2820 if (p >= literal.length()) { 2821 break; 2822 } 2823 } 2824 if (t >= text.length() || literal.charAt(p) != text.charAt(t)) { 2825 // Ran out of text, or found a non-matching character: 2826 // OK in lenient mode, an error in strict mode. 2827 if (whitespaceLenient) { 2828 if (t == textOffset && text.charAt(t) == 0x2e && 2829 isAfterNonNumericField(pattern, patternOffset)) { 2830 // Lenient mode and the literal input text begins with a "." and 2831 // we are after a non-numeric field: We skip the "." 2832 ++t; 2833 continue; // Do not update p. 2834 } 2835 // if it is actual whitespace and we're whitespace lenient it's OK 2836 2837 char16_t wsc = text.charAt(t); 2838 if(PatternProps::isWhiteSpace(wsc)) { 2839 // Lenient mode and it's just whitespace we skip it 2840 ++t; 2841 continue; // Do not update p. 2842 } 2843 } 2844 // hack around oldleniency being a bit of a catch-all bucket and we're just adding support specifically for partial matches 2845 if(partialMatchLenient && oldLeniency) { 2846 break; 2847 } 2848 2849 return false; 2850 } 2851 ++p; 2852 ++t; 2853 } 2854 2855 // At this point if we're in strict mode we have a complete match. 2856 // If we're in lenient mode we may have a partial match, or no 2857 // match at all. 2858 if (p <= 0) { 2859 // no match. Pretend it matched a run of whitespace 2860 // and ignorables in the text. 2861 const UnicodeSet *ignorables = nullptr; 2862 UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(pattern.charAt(i)); 2863 if (patternCharIndex != UDAT_FIELD_COUNT) { 2864 ignorables = SimpleDateFormatStaticSets::getIgnorables(patternCharIndex); 2865 } 2866 2867 for (t = textOffset; t < text.length(); t += 1) { 2868 char16_t ch = text.charAt(t); 2869 2870 if (ignorables == nullptr || !ignorables->contains(ch)) { 2871 break; 2872 } 2873 } 2874 } 2875 2876 // if we get here, we've got a complete match. 2877 patternOffset = i - 1; 2878 textOffset = t; 2879 2880 return true; 2881 } 2882 2883 //---------------------------------------------------------------------- 2884 // check both wide and abbrev months. 2885 // Does not currently handle monthPattern. 2886 // UCalendarDateFields field = UCAL_MONTH 2887 2888 int32_t SimpleDateFormat::matchAlphaMonthStrings(const UnicodeString& text, 2889 int32_t start, 2890 const UnicodeString* wideData, 2891 const UnicodeString* shortData, 2892 int32_t dataCount, 2893 Calendar& cal) const 2894 { 2895 int32_t i; 2896 int32_t bestMatchLength = 0, bestMatch = -1; 2897 2898 for (i = 0; i < dataCount; ++i) { 2899 int32_t matchLen = 0; 2900 if ((matchLen = matchStringWithOptionalDot(text, start, wideData[i])) > bestMatchLength) { 2901 bestMatch = i; 2902 bestMatchLength = matchLen; 2903 } 2904 } 2905 for (i = 0; i < dataCount; ++i) { 2906 int32_t matchLen = 0; 2907 if ((matchLen = matchStringWithOptionalDot(text, start, shortData[i])) > bestMatchLength) { 2908 bestMatch = i; 2909 bestMatchLength = matchLen; 2910 } 2911 } 2912 2913 if (bestMatch >= 0) { 2914 // Adjustment for Hebrew Calendar month Adar II 2915 if (typeid(cal) == typeid(HebrewCalendar) && bestMatch==13) { 2916 cal.set(UCAL_MONTH,6); 2917 } else { 2918 cal.set(UCAL_MONTH, bestMatch); 2919 } 2920 return start + bestMatchLength; 2921 } 2922 2923 return -start; 2924 } 2925 2926 //---------------------------------------------------------------------- 2927 2928 int32_t SimpleDateFormat::matchString(const UnicodeString& text, 2929 int32_t start, 2930 UCalendarDateFields field, 2931 const UnicodeString* data, 2932 int32_t dataCount, 2933 const UnicodeString* monthPattern, 2934 Calendar& cal) const 2935 { 2936 int32_t i = 0; 2937 int32_t count = dataCount; 2938 2939 if (field == UCAL_DAY_OF_WEEK) i = 1; 2940 2941 // There may be multiple strings in the data[] array which begin with 2942 // the same prefix (e.g., Cerven and Cervenec (June and July) in Czech). 2943 // We keep track of the longest match, and return that. Note that this 2944 // unfortunately requires us to test all array elements. 2945 // But this does not really work for cases such as Chuvash in which 2946 // May is "ҫу" and August is "ҫурла"/"ҫур.", hence matchAlphaMonthStrings. 2947 int32_t bestMatchLength = 0, bestMatch = -1; 2948 UnicodeString bestMatchName; 2949 int32_t isLeapMonth = 0; 2950 2951 for (; i < count; ++i) { 2952 int32_t matchLen = 0; 2953 if ((matchLen = matchStringWithOptionalDot(text, start, data[i])) > bestMatchLength) { 2954 bestMatch = i; 2955 bestMatchLength = matchLen; 2956 } 2957 2958 if (monthPattern != nullptr) { 2959 UErrorCode status = U_ZERO_ERROR; 2960 UnicodeString leapMonthName; 2961 SimpleFormatter(*monthPattern, 1, 1, status).format(data[i], leapMonthName, status); 2962 if (U_SUCCESS(status)) { 2963 if ((matchLen = matchStringWithOptionalDot(text, start, leapMonthName)) > bestMatchLength) { 2964 bestMatch = i; 2965 bestMatchLength = matchLen; 2966 isLeapMonth = 1; 2967 } 2968 } 2969 } 2970 } 2971 2972 if (bestMatch >= 0) { 2973 if (field < UCAL_FIELD_COUNT) { 2974 // Adjustment for Hebrew Calendar month Adar II 2975 if (typeid(cal) == typeid(HebrewCalendar) && field==UCAL_MONTH && bestMatch==13) { 2976 cal.set(field,6); 2977 } else { 2978 if (field == UCAL_YEAR) { 2979 bestMatch++; // only get here for cyclic year names, which match 1-based years 1-60 2980 } 2981 cal.set(field, bestMatch); 2982 } 2983 if (monthPattern != nullptr) { 2984 cal.set(UCAL_IS_LEAP_MONTH, isLeapMonth); 2985 } 2986 } 2987 2988 return start + bestMatchLength; 2989 } 2990 2991 return -start; 2992 } 2993 2994 static int32_t 2995 matchStringWithOptionalDot(const UnicodeString &text, 2996 int32_t index, 2997 const UnicodeString &data) { 2998 UErrorCode sts = U_ZERO_ERROR; 2999 int32_t matchLenText = 0; 3000 int32_t matchLenData = 0; 3001 3002 u_caseInsensitivePrefixMatch(text.getBuffer() + index, text.length() - index, 3003 data.getBuffer(), data.length(), 3004 0 /* default case option */, 3005 &matchLenText, &matchLenData, 3006 &sts); 3007 U_ASSERT (U_SUCCESS(sts)); 3008 3009 if (matchLenData == data.length() /* normal match */ 3010 || (data.charAt(data.length() - 1) == 0x2e 3011 && matchLenData == data.length() - 1 /* match without trailing dot */)) { 3012 return matchLenText; 3013 } 3014 3015 return 0; 3016 } 3017 3018 //---------------------------------------------------------------------- 3019 3020 void 3021 SimpleDateFormat::set2DigitYearStart(UDate d, UErrorCode& status) 3022 { 3023 parseAmbiguousDatesAsAfter(d, status); 3024 } 3025 3026 /** 3027 * Private member function that converts the parsed date strings into 3028 * timeFields. Returns -start (for ParsePosition) if failed. 3029 */ 3030 int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, char16_t ch, int32_t count, 3031 UBool obeyCount, UBool allowNegative, UBool ambiguousYear[], int32_t& saveHebrewMonth, Calendar& cal, 3032 int32_t patLoc, MessageFormat * numericLeapMonthFormatter, UTimeZoneFormatTimeType *tzTimeType, 3033 int32_t *dayPeriod) const 3034 { 3035 Formattable number; 3036 int32_t value = 0; 3037 int32_t i; 3038 int32_t ps = 0; 3039 UErrorCode status = U_ZERO_ERROR; 3040 ParsePosition pos(0); 3041 UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(ch); 3042 const NumberFormat *currentNumberFormat; 3043 UnicodeString temp; 3044 UBool gotNumber = false; 3045 3046 #if defined (U_DEBUG_CAL) 3047 //fprintf(stderr, "%s:%d - [%c] st=%d \n", __FILE__, __LINE__, (char) ch, start); 3048 #endif 3049 3050 if (patternCharIndex == UDAT_FIELD_COUNT) { 3051 return -start; 3052 } 3053 3054 currentNumberFormat = getNumberFormatByIndex(patternCharIndex); 3055 if (currentNumberFormat == nullptr) { 3056 return -start; 3057 } 3058 UCalendarDateFields field = fgPatternIndexToCalendarField[patternCharIndex]; // UCAL_FIELD_COUNT if irrelevant 3059 UnicodeString hebr("hebr", 4, US_INV); 3060 3061 if (numericLeapMonthFormatter != nullptr) { 3062 numericLeapMonthFormatter->setFormats(reinterpret_cast<const Format**>(¤tNumberFormat), 1); 3063 } 3064 3065 // If there are any spaces here, skip over them. If we hit the end 3066 // of the string, then fail. 3067 for (;;) { 3068 if (start >= text.length()) { 3069 return -start; 3070 } 3071 UChar32 c = text.char32At(start); 3072 if (!u_isUWhiteSpace(c) /*||*/ && !PatternProps::isWhiteSpace(c)) { 3073 break; 3074 } 3075 start += U16_LENGTH(c); 3076 } 3077 pos.setIndex(start); 3078 3079 UBool isChineseCalendar = typeid(cal) == typeid(ChineseCalendar) || 3080 typeid(cal) == typeid(DangiCalendar); 3081 // We handle a few special cases here where we need to parse 3082 // a number value. We handle further, more generic cases below. We need 3083 // to handle some of them here because some fields require extra processing on 3084 // the parsed value. 3085 if (patternCharIndex == UDAT_HOUR_OF_DAY1_FIELD || // k 3086 patternCharIndex == UDAT_HOUR_OF_DAY0_FIELD || // H 3087 patternCharIndex == UDAT_HOUR1_FIELD || // h 3088 patternCharIndex == UDAT_HOUR0_FIELD || // K 3089 (patternCharIndex == UDAT_DOW_LOCAL_FIELD && count <= 2) || // e 3090 (patternCharIndex == UDAT_STANDALONE_DAY_FIELD && count <= 2) || // c 3091 (patternCharIndex == UDAT_MONTH_FIELD && count <= 2) || // M 3092 (patternCharIndex == UDAT_STANDALONE_MONTH_FIELD && count <= 2) || // L 3093 (patternCharIndex == UDAT_QUARTER_FIELD && count <= 2) || // Q 3094 (patternCharIndex == UDAT_STANDALONE_QUARTER_FIELD && count <= 2) || // q 3095 patternCharIndex == UDAT_YEAR_FIELD || // y 3096 patternCharIndex == UDAT_YEAR_WOY_FIELD || // Y 3097 patternCharIndex == UDAT_YEAR_NAME_FIELD || // U (falls back to numeric) 3098 (patternCharIndex == UDAT_ERA_FIELD && isChineseCalendar) || // G 3099 patternCharIndex == UDAT_FRACTIONAL_SECOND_FIELD) // S 3100 { 3101 int32_t parseStart = pos.getIndex(); 3102 // It would be good to unify this with the obeyCount logic below, 3103 // but that's going to be difficult. 3104 const UnicodeString* src; 3105 3106 UBool parsedNumericLeapMonth = false; 3107 if (numericLeapMonthFormatter != nullptr && (patternCharIndex == UDAT_MONTH_FIELD || patternCharIndex == UDAT_STANDALONE_MONTH_FIELD)) { 3108 int32_t argCount; 3109 Formattable * args = numericLeapMonthFormatter->parse(text, pos, argCount); 3110 if (args != nullptr && argCount == 1 && pos.getIndex() > parseStart && args[0].isNumeric()) { 3111 parsedNumericLeapMonth = true; 3112 number.setLong(args[0].getLong()); 3113 cal.set(UCAL_IS_LEAP_MONTH, 1); 3114 delete[] args; 3115 } else { 3116 pos.setIndex(parseStart); 3117 cal.set(UCAL_IS_LEAP_MONTH, 0); 3118 } 3119 } 3120 3121 if (!parsedNumericLeapMonth) { 3122 if (obeyCount) { 3123 if ((start+count) > text.length()) { 3124 return -start; 3125 } 3126 3127 text.extractBetween(0, start + count, temp); 3128 src = &temp; 3129 } else { 3130 src = &text; 3131 } 3132 3133 parseInt(*src, number, pos, allowNegative,currentNumberFormat); 3134 } 3135 3136 int32_t txtLoc = pos.getIndex(); 3137 3138 if (txtLoc > parseStart) { 3139 value = number.getLong(); 3140 gotNumber = true; 3141 3142 // suffix processing 3143 if (value < 0 ) { 3144 txtLoc = checkIntSuffix(text, txtLoc, patLoc+1, true); 3145 if (txtLoc != pos.getIndex()) { 3146 value *= -1; 3147 } 3148 } 3149 else { 3150 txtLoc = checkIntSuffix(text, txtLoc, patLoc+1, false); 3151 } 3152 3153 if (!getBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, status)) { 3154 // Check the range of the value 3155 int32_t bias = gFieldRangeBias[patternCharIndex]; 3156 if (bias >= 0 && (value > cal.getMaximum(field) + bias || value < cal.getMinimum(field) + bias)) { 3157 return -start; 3158 } 3159 } 3160 3161 pos.setIndex(txtLoc); 3162 } 3163 } 3164 3165 // Make sure that we got a number if 3166 // we want one, and didn't get one 3167 // if we don't want one. 3168 switch (patternCharIndex) { 3169 case UDAT_HOUR_OF_DAY1_FIELD: 3170 case UDAT_HOUR_OF_DAY0_FIELD: 3171 case UDAT_HOUR1_FIELD: 3172 case UDAT_HOUR0_FIELD: 3173 // special range check for hours: 3174 if (value < 0 || value > 24) { 3175 return -start; 3176 } 3177 3178 // fall through to gotNumber check 3179 U_FALLTHROUGH; 3180 case UDAT_YEAR_FIELD: 3181 case UDAT_YEAR_WOY_FIELD: 3182 case UDAT_FRACTIONAL_SECOND_FIELD: 3183 // these must be a number 3184 if (! gotNumber) { 3185 return -start; 3186 } 3187 3188 break; 3189 3190 default: 3191 // we check the rest of the fields below. 3192 break; 3193 } 3194 3195 switch (patternCharIndex) { 3196 case UDAT_ERA_FIELD: 3197 if (isChineseCalendar) { 3198 if (!gotNumber) { 3199 return -start; 3200 } 3201 cal.set(UCAL_ERA, value); 3202 return pos.getIndex(); 3203 } 3204 if (count == 5) { 3205 ps = matchString(text, start, UCAL_ERA, fSymbols->fNarrowEras, fSymbols->fNarrowErasCount, nullptr, cal); 3206 } else if (count == 4) { 3207 ps = matchString(text, start, UCAL_ERA, fSymbols->fEraNames, fSymbols->fEraNamesCount, nullptr, cal); 3208 } else { 3209 ps = matchString(text, start, UCAL_ERA, fSymbols->fEras, fSymbols->fErasCount, nullptr, cal); 3210 } 3211 3212 // check return position, if it equals -start, then matchString error 3213 // special case the return code so we don't necessarily fail out until we 3214 // verify no year information also 3215 if (ps == -start) 3216 ps--; 3217 3218 return ps; 3219 3220 case UDAT_YEAR_FIELD: 3221 // If there are 3 or more YEAR pattern characters, this indicates 3222 // that the year value is to be treated literally, without any 3223 // two-digit year adjustments (e.g., from "01" to 2001). Otherwise 3224 // we made adjustments to place the 2-digit year in the proper 3225 // century, for parsed strings from "00" to "99". Any other string 3226 // is treated literally: "2250", "-1", "1", "002". 3227 if (fDateOverride.compare(hebr)==0 && value < 1000) { 3228 value += HEBREW_CAL_CUR_MILLENIUM_START_YEAR; 3229 } else if (text.moveIndex32(start, 2) == pos.getIndex() && !isChineseCalendar 3230 && u_isdigit(text.char32At(start)) 3231 && u_isdigit(text.char32At(text.moveIndex32(start, 1)))) 3232 { 3233 // only adjust year for patterns less than 3. 3234 if(count < 3) { 3235 // Assume for example that the defaultCenturyStart is 6/18/1903. 3236 // This means that two-digit years will be forced into the range 3237 // 6/18/1903 to 6/17/2003. As a result, years 00, 01, and 02 3238 // correspond to 2000, 2001, and 2002. Years 04, 05, etc. correspond 3239 // to 1904, 1905, etc. If the year is 03, then it is 2003 if the 3240 // other fields specify a date before 6/18, or 1903 if they specify a 3241 // date afterwards. As a result, 03 is an ambiguous year. All other 3242 // two-digit years are unambiguous. 3243 if(fHaveDefaultCentury) { // check if this formatter even has a pivot year 3244 int32_t ambiguousTwoDigitYear = fDefaultCenturyStartYear % 100; 3245 ambiguousYear[0] = (value == ambiguousTwoDigitYear); 3246 value += (fDefaultCenturyStartYear/100)*100 + 3247 (value < ambiguousTwoDigitYear ? 100 : 0); 3248 } 3249 } 3250 } 3251 cal.set(UCAL_YEAR, value); 3252 3253 // Delayed checking for adjustment of Hebrew month numbers in non-leap years. 3254 if (saveHebrewMonth >= 0) { 3255 HebrewCalendar *hc = (HebrewCalendar*)&cal; 3256 if (!hc->isLeapYear(value) && saveHebrewMonth >= 6) { 3257 cal.set(UCAL_MONTH,saveHebrewMonth); 3258 } else { 3259 cal.set(UCAL_MONTH,saveHebrewMonth-1); 3260 } 3261 saveHebrewMonth = -1; 3262 } 3263 return pos.getIndex(); 3264 3265 case UDAT_YEAR_WOY_FIELD: 3266 // Comment is the same as for UDAT_Year_FIELDs - look above 3267 if (fDateOverride.compare(hebr)==0 && value < 1000) { 3268 value += HEBREW_CAL_CUR_MILLENIUM_START_YEAR; 3269 } else if (text.moveIndex32(start, 2) == pos.getIndex() 3270 && u_isdigit(text.char32At(start)) 3271 && u_isdigit(text.char32At(text.moveIndex32(start, 1))) 3272 && fHaveDefaultCentury ) 3273 { 3274 int32_t ambiguousTwoDigitYear = fDefaultCenturyStartYear % 100; 3275 ambiguousYear[0] = (value == ambiguousTwoDigitYear); 3276 value += (fDefaultCenturyStartYear/100)*100 + 3277 (value < ambiguousTwoDigitYear ? 100 : 0); 3278 } 3279 cal.set(UCAL_YEAR_WOY, value); 3280 return pos.getIndex(); 3281 3282 case UDAT_YEAR_NAME_FIELD: 3283 if (fSymbols->fShortYearNames != nullptr) { 3284 int32_t newStart = matchString(text, start, UCAL_YEAR, fSymbols->fShortYearNames, fSymbols->fShortYearNamesCount, nullptr, cal); 3285 if (newStart > 0) { 3286 return newStart; 3287 } 3288 } 3289 if (gotNumber && (getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC,status) || value > fSymbols->fShortYearNamesCount)) { 3290 cal.set(UCAL_YEAR, value); 3291 return pos.getIndex(); 3292 } 3293 return -start; 3294 3295 case UDAT_MONTH_FIELD: 3296 case UDAT_STANDALONE_MONTH_FIELD: 3297 if (gotNumber) // i.e., M or MM. 3298 { 3299 // When parsing month numbers from the Hebrew Calendar, we might need to adjust the month depending on whether 3300 // or not it was a leap year. We may or may not yet know what year it is, so might have to delay checking until 3301 // the year is parsed. 3302 if (typeid(cal) == typeid(HebrewCalendar)) { 3303 HebrewCalendar *hc = (HebrewCalendar*)&cal; 3304 if (cal.isSet(UCAL_YEAR)) { 3305 UErrorCode monthStatus = U_ZERO_ERROR; 3306 if (!hc->isLeapYear(hc->get(UCAL_YEAR, monthStatus)) && value >= 6) { 3307 cal.set(UCAL_MONTH, value); 3308 } else { 3309 cal.set(UCAL_MONTH, value - 1); 3310 } 3311 } else { 3312 saveHebrewMonth = value; 3313 } 3314 } else { 3315 // Don't want to parse the month if it is a string 3316 // while pattern uses numeric style: M/MM, L/LL 3317 // [We computed 'value' above.] 3318 cal.set(UCAL_MONTH, value - 1); 3319 } 3320 return pos.getIndex(); 3321 } else { 3322 // count >= 3 // i.e., MMM/MMMM, LLL/LLLL 3323 // Want to be able to parse both short and long forms. 3324 // Try count == 4 first: 3325 UnicodeString * wideMonthPat = nullptr; 3326 UnicodeString * shortMonthPat = nullptr; 3327 if (fSymbols->fLeapMonthPatterns != nullptr && fSymbols->fLeapMonthPatternsCount >= DateFormatSymbols::kMonthPatternsCount) { 3328 if (patternCharIndex==UDAT_MONTH_FIELD) { 3329 wideMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatWide]; 3330 shortMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatAbbrev]; 3331 } else { 3332 wideMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneWide]; 3333 shortMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneAbbrev]; 3334 } 3335 } 3336 int32_t newStart = 0; 3337 if (patternCharIndex==UDAT_MONTH_FIELD) { 3338 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) && count>=3 && count <=4 && 3339 fSymbols->fLeapMonthPatterns==nullptr && fSymbols->fMonthsCount==fSymbols->fShortMonthsCount) { 3340 // single function to check both wide and short, an experiment 3341 newStart = matchAlphaMonthStrings(text, start, fSymbols->fMonths, fSymbols->fShortMonths, fSymbols->fMonthsCount, cal); // try MMMM,MMM 3342 if (newStart > 0) { 3343 return newStart; 3344 } 3345 } 3346 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) { 3347 newStart = matchString(text, start, UCAL_MONTH, fSymbols->fMonths, fSymbols->fMonthsCount, wideMonthPat, cal); // try MMMM 3348 if (newStart > 0) { 3349 return newStart; 3350 } 3351 } 3352 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) { 3353 newStart = matchString(text, start, UCAL_MONTH, fSymbols->fShortMonths, fSymbols->fShortMonthsCount, shortMonthPat, cal); // try MMM 3354 } 3355 } else { 3356 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) && count>=3 && count <=4 && 3357 fSymbols->fLeapMonthPatterns==nullptr && fSymbols->fStandaloneMonthsCount==fSymbols->fStandaloneShortMonthsCount) { 3358 // single function to check both wide and short, an experiment 3359 newStart = matchAlphaMonthStrings(text, start, fSymbols->fStandaloneMonths, fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneMonthsCount, cal); // try MMMM,MMM 3360 if (newStart > 0) { 3361 return newStart; 3362 } 3363 } 3364 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) { 3365 newStart = matchString(text, start, UCAL_MONTH, fSymbols->fStandaloneMonths, fSymbols->fStandaloneMonthsCount, wideMonthPat, cal); // try LLLL 3366 if (newStart > 0) { 3367 return newStart; 3368 } 3369 } 3370 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) { 3371 newStart = matchString(text, start, UCAL_MONTH, fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneShortMonthsCount, shortMonthPat, cal); // try LLL 3372 } 3373 } 3374 if (newStart > 0 || !getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status)) // currently we do not try to parse MMMMM/LLLLL: #8860 3375 return newStart; 3376 // else we allowing parsing as number, below 3377 } 3378 break; 3379 3380 case UDAT_HOUR_OF_DAY1_FIELD: 3381 // [We computed 'value' above.] 3382 if (value == cal.getMaximum(UCAL_HOUR_OF_DAY) + 1) 3383 value = 0; 3384 3385 // fall through to set field 3386 U_FALLTHROUGH; 3387 case UDAT_HOUR_OF_DAY0_FIELD: 3388 cal.set(UCAL_HOUR_OF_DAY, value); 3389 return pos.getIndex(); 3390 3391 case UDAT_FRACTIONAL_SECOND_FIELD: 3392 // Fractional seconds left-justify 3393 i = countDigits(text, start, pos.getIndex()); 3394 if (i < 3) { 3395 while (i < 3) { 3396 value *= 10; 3397 i++; 3398 } 3399 } else { 3400 int32_t a = 1; 3401 while (i > 3) { 3402 a *= 10; 3403 i--; 3404 } 3405 value /= a; 3406 } 3407 cal.set(UCAL_MILLISECOND, value); 3408 return pos.getIndex(); 3409 3410 case UDAT_DOW_LOCAL_FIELD: 3411 if (gotNumber) // i.e., e or ee 3412 { 3413 // [We computed 'value' above.] 3414 cal.set(UCAL_DOW_LOCAL, value); 3415 return pos.getIndex(); 3416 } 3417 // else for eee-eeeee fall through to handling of EEE-EEEEE 3418 // fall through, do not break here 3419 U_FALLTHROUGH; 3420 case UDAT_DAY_OF_WEEK_FIELD: 3421 { 3422 // Want to be able to parse both short and long forms. 3423 // Try count == 4 (EEEE) wide first: 3424 int32_t newStart = 0; 3425 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) { 3426 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK, 3427 fSymbols->fWeekdays, fSymbols->fWeekdaysCount, nullptr, cal)) > 0) 3428 return newStart; 3429 } 3430 // EEEE wide failed, now try EEE abbreviated 3431 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) { 3432 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK, 3433 fSymbols->fShortWeekdays, fSymbols->fShortWeekdaysCount, nullptr, cal)) > 0) 3434 return newStart; 3435 } 3436 // EEE abbreviated failed, now try EEEEEE short 3437 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 6) { 3438 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK, 3439 fSymbols->fShorterWeekdays, fSymbols->fShorterWeekdaysCount, nullptr, cal)) > 0) 3440 return newStart; 3441 } 3442 // EEEEEE short failed, now try EEEEE narrow 3443 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 5) { 3444 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK, 3445 fSymbols->fNarrowWeekdays, fSymbols->fNarrowWeekdaysCount, nullptr, cal)) > 0) 3446 return newStart; 3447 } 3448 if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status) || patternCharIndex == UDAT_DAY_OF_WEEK_FIELD) 3449 return newStart; 3450 // else we allowing parsing as number, below 3451 } 3452 break; 3453 3454 case UDAT_STANDALONE_DAY_FIELD: 3455 { 3456 if (gotNumber) // c or cc 3457 { 3458 // [We computed 'value' above.] 3459 cal.set(UCAL_DOW_LOCAL, value); 3460 return pos.getIndex(); 3461 } 3462 // Want to be able to parse both short and long forms. 3463 // Try count == 4 (cccc) first: 3464 int32_t newStart = 0; 3465 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) { 3466 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK, 3467 fSymbols->fStandaloneWeekdays, fSymbols->fStandaloneWeekdaysCount, nullptr, cal)) > 0) 3468 return newStart; 3469 } 3470 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) { 3471 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK, 3472 fSymbols->fStandaloneShortWeekdays, fSymbols->fStandaloneShortWeekdaysCount, nullptr, cal)) > 0) 3473 return newStart; 3474 } 3475 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 6) { 3476 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK, 3477 fSymbols->fStandaloneShorterWeekdays, fSymbols->fStandaloneShorterWeekdaysCount, nullptr, cal)) > 0) 3478 return newStart; 3479 } 3480 if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status)) 3481 return newStart; 3482 // else we allowing parsing as number, below 3483 } 3484 break; 3485 3486 case UDAT_AM_PM_FIELD: 3487 { 3488 // optionally try both wide/abbrev and narrow forms 3489 int32_t newStart = 0; 3490 // try wide 3491 if( getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4 ) { 3492 if ((newStart = matchString(text, start, UCAL_AM_PM, fSymbols->fWideAmPms, fSymbols->fWideAmPmsCount, nullptr, cal)) > 0) { 3493 return newStart; 3494 } 3495 } 3496 // try abbreviated 3497 if( getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count <= 3 ) { 3498 if ((newStart = matchString(text, start, UCAL_AM_PM, fSymbols->fAmPms, fSymbols->fAmPmsCount, nullptr, cal)) > 0) { 3499 return newStart; 3500 } 3501 } 3502 // try narrow 3503 if( getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count >= 5 ) { 3504 if ((newStart = matchString(text, start, UCAL_AM_PM, fSymbols->fNarrowAmPms, fSymbols->fNarrowAmPmsCount, nullptr, cal)) > 0) { 3505 return newStart; 3506 } 3507 } 3508 // no matches for given options 3509 return -start; 3510 } 3511 3512 case UDAT_HOUR1_FIELD: 3513 // [We computed 'value' above.] 3514 if (value == cal.getLeastMaximum(UCAL_HOUR)+1) 3515 value = 0; 3516 3517 // fall through to set field 3518 U_FALLTHROUGH; 3519 case UDAT_HOUR0_FIELD: 3520 cal.set(UCAL_HOUR, value); 3521 return pos.getIndex(); 3522 3523 case UDAT_QUARTER_FIELD: 3524 if (gotNumber) // i.e., Q or QQ. 3525 { 3526 // Don't want to parse the month if it is a string 3527 // while pattern uses numeric style: Q or QQ. 3528 // [We computed 'value' above.] 3529 cal.set(UCAL_MONTH, (value - 1) * 3); 3530 return pos.getIndex(); 3531 } else { 3532 // count >= 3 // i.e., QQQ or QQQQ 3533 // Want to be able to parse short, long, and narrow forms. 3534 // Try count == 4 first: 3535 int32_t newStart = 0; 3536 3537 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) { 3538 if ((newStart = matchQuarterString(text, start, UCAL_MONTH, 3539 fSymbols->fQuarters, fSymbols->fQuartersCount, cal)) > 0) 3540 return newStart; 3541 } 3542 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) { 3543 if ((newStart = matchQuarterString(text, start, UCAL_MONTH, 3544 fSymbols->fShortQuarters, fSymbols->fShortQuartersCount, cal)) > 0) 3545 return newStart; 3546 } 3547 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 5) { 3548 if ((newStart = matchQuarterString(text, start, UCAL_MONTH, 3549 fSymbols->fNarrowQuarters, fSymbols->fNarrowQuartersCount, cal)) > 0) 3550 return newStart; 3551 } 3552 if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status)) 3553 return newStart; 3554 // else we allowing parsing as number, below 3555 if(!getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status)) 3556 return -start; 3557 } 3558 break; 3559 3560 case UDAT_STANDALONE_QUARTER_FIELD: 3561 if (gotNumber) // i.e., q or qq. 3562 { 3563 // Don't want to parse the month if it is a string 3564 // while pattern uses numeric style: q or q. 3565 // [We computed 'value' above.] 3566 cal.set(UCAL_MONTH, (value - 1) * 3); 3567 return pos.getIndex(); 3568 } else { 3569 // count >= 3 // i.e., qqq or qqqq 3570 // Want to be able to parse both short and long forms. 3571 // Try count == 4 first: 3572 int32_t newStart = 0; 3573 3574 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) { 3575 if ((newStart = matchQuarterString(text, start, UCAL_MONTH, 3576 fSymbols->fStandaloneQuarters, fSymbols->fStandaloneQuartersCount, cal)) > 0) 3577 return newStart; 3578 } 3579 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) { 3580 if ((newStart = matchQuarterString(text, start, UCAL_MONTH, 3581 fSymbols->fStandaloneShortQuarters, fSymbols->fStandaloneShortQuartersCount, cal)) > 0) 3582 return newStart; 3583 } 3584 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 5) { 3585 if ((newStart = matchQuarterString(text, start, UCAL_MONTH, 3586 fSymbols->fStandaloneNarrowQuarters, fSymbols->fStandaloneNarrowQuartersCount, cal)) > 0) 3587 return newStart; 3588 } 3589 if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status)) 3590 return newStart; 3591 // else we allowing parsing as number, below 3592 if(!getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status)) 3593 return -start; 3594 } 3595 break; 3596 3597 case UDAT_TIMEZONE_FIELD: // 'z' 3598 { 3599 UTimeZoneFormatStyle style = (count < 4) ? UTZFMT_STYLE_SPECIFIC_SHORT : UTZFMT_STYLE_SPECIFIC_LONG; 3600 const TimeZoneFormat *tzfmt = tzFormat(status); 3601 if (U_SUCCESS(status)) { 3602 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType); 3603 if (tz != nullptr) { 3604 cal.adoptTimeZone(tz); 3605 return pos.getIndex(); 3606 } 3607 } 3608 return -start; 3609 } 3610 break; 3611 case UDAT_TIMEZONE_RFC_FIELD: // 'Z' 3612 { 3613 UTimeZoneFormatStyle style = (count < 4) ? 3614 UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL : ((count == 5) ? UTZFMT_STYLE_ISO_EXTENDED_FULL: UTZFMT_STYLE_LOCALIZED_GMT); 3615 const TimeZoneFormat *tzfmt = tzFormat(status); 3616 if (U_SUCCESS(status)) { 3617 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType); 3618 if (tz != nullptr) { 3619 cal.adoptTimeZone(tz); 3620 return pos.getIndex(); 3621 } 3622 } 3623 return -start; 3624 } 3625 case UDAT_TIMEZONE_GENERIC_FIELD: // 'v' 3626 { 3627 UTimeZoneFormatStyle style = (count < 4) ? UTZFMT_STYLE_GENERIC_SHORT : UTZFMT_STYLE_GENERIC_LONG; 3628 const TimeZoneFormat *tzfmt = tzFormat(status); 3629 if (U_SUCCESS(status)) { 3630 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType); 3631 if (tz != nullptr) { 3632 cal.adoptTimeZone(tz); 3633 return pos.getIndex(); 3634 } 3635 } 3636 return -start; 3637 } 3638 case UDAT_TIMEZONE_SPECIAL_FIELD: // 'V' 3639 { 3640 UTimeZoneFormatStyle style; 3641 switch (count) { 3642 case 1: 3643 style = UTZFMT_STYLE_ZONE_ID_SHORT; 3644 break; 3645 case 2: 3646 style = UTZFMT_STYLE_ZONE_ID; 3647 break; 3648 case 3: 3649 style = UTZFMT_STYLE_EXEMPLAR_LOCATION; 3650 break; 3651 default: 3652 style = UTZFMT_STYLE_GENERIC_LOCATION; 3653 break; 3654 } 3655 const TimeZoneFormat *tzfmt = tzFormat(status); 3656 if (U_SUCCESS(status)) { 3657 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType); 3658 if (tz != nullptr) { 3659 cal.adoptTimeZone(tz); 3660 return pos.getIndex(); 3661 } 3662 } 3663 return -start; 3664 } 3665 case UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD: // 'O' 3666 { 3667 UTimeZoneFormatStyle style = (count < 4) ? UTZFMT_STYLE_LOCALIZED_GMT_SHORT : UTZFMT_STYLE_LOCALIZED_GMT; 3668 const TimeZoneFormat *tzfmt = tzFormat(status); 3669 if (U_SUCCESS(status)) { 3670 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType); 3671 if (tz != nullptr) { 3672 cal.adoptTimeZone(tz); 3673 return pos.getIndex(); 3674 } 3675 } 3676 return -start; 3677 } 3678 case UDAT_TIMEZONE_ISO_FIELD: // 'X' 3679 { 3680 UTimeZoneFormatStyle style; 3681 switch (count) { 3682 case 1: 3683 style = UTZFMT_STYLE_ISO_BASIC_SHORT; 3684 break; 3685 case 2: 3686 style = UTZFMT_STYLE_ISO_BASIC_FIXED; 3687 break; 3688 case 3: 3689 style = UTZFMT_STYLE_ISO_EXTENDED_FIXED; 3690 break; 3691 case 4: 3692 style = UTZFMT_STYLE_ISO_BASIC_FULL; 3693 break; 3694 default: 3695 style = UTZFMT_STYLE_ISO_EXTENDED_FULL; 3696 break; 3697 } 3698 const TimeZoneFormat *tzfmt = tzFormat(status); 3699 if (U_SUCCESS(status)) { 3700 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType); 3701 if (tz != nullptr) { 3702 cal.adoptTimeZone(tz); 3703 return pos.getIndex(); 3704 } 3705 } 3706 return -start; 3707 } 3708 case UDAT_TIMEZONE_ISO_LOCAL_FIELD: // 'x' 3709 { 3710 UTimeZoneFormatStyle style; 3711 switch (count) { 3712 case 1: 3713 style = UTZFMT_STYLE_ISO_BASIC_LOCAL_SHORT; 3714 break; 3715 case 2: 3716 style = UTZFMT_STYLE_ISO_BASIC_LOCAL_FIXED; 3717 break; 3718 case 3: 3719 style = UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FIXED; 3720 break; 3721 case 4: 3722 style = UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL; 3723 break; 3724 default: 3725 style = UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FULL; 3726 break; 3727 } 3728 const TimeZoneFormat *tzfmt = tzFormat(status); 3729 if (U_SUCCESS(status)) { 3730 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType); 3731 if (tz != nullptr) { 3732 cal.adoptTimeZone(tz); 3733 return pos.getIndex(); 3734 } 3735 } 3736 return -start; 3737 } 3738 // currently no pattern character is defined for UDAT_TIME_SEPARATOR_FIELD 3739 // so we should not get here. Leave support in for future definition. 3740 case UDAT_TIME_SEPARATOR_FIELD: 3741 { 3742 static const char16_t def_sep = DateFormatSymbols::DEFAULT_TIME_SEPARATOR; 3743 static const char16_t alt_sep = DateFormatSymbols::ALTERNATE_TIME_SEPARATOR; 3744 3745 // Try matching a time separator. 3746 int32_t count_sep = 1; 3747 UnicodeString data[3]; 3748 fSymbols->getTimeSeparatorString(data[0]); 3749 3750 // Add the default, if different from the locale. 3751 if (data[0].compare(&def_sep, 1) != 0) { 3752 data[count_sep++].setTo(def_sep); 3753 } 3754 3755 // If lenient, add also the alternate, if different from the locale. 3756 if (isLenient() && data[0].compare(&alt_sep, 1) != 0) { 3757 data[count_sep++].setTo(alt_sep); 3758 } 3759 3760 return matchString(text, start, UCAL_FIELD_COUNT /* => nothing to set */, data, count_sep, nullptr, cal); 3761 } 3762 3763 case UDAT_AM_PM_MIDNIGHT_NOON_FIELD: 3764 { 3765 U_ASSERT(dayPeriod != nullptr); 3766 int32_t ampmStart = subParse(text, start, 0x61, count, 3767 obeyCount, allowNegative, ambiguousYear, saveHebrewMonth, cal, 3768 patLoc, numericLeapMonthFormatter, tzTimeType); 3769 3770 if (ampmStart > 0) { 3771 return ampmStart; 3772 } else { 3773 int32_t newStart = 0; 3774 3775 // Only match the first two strings from the day period strings array. 3776 if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) { 3777 if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fAbbreviatedDayPeriods, 3778 2, *dayPeriod)) > 0) { 3779 return newStart; 3780 } 3781 } 3782 if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 5) { 3783 if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fNarrowDayPeriods, 3784 2, *dayPeriod)) > 0) { 3785 return newStart; 3786 } 3787 } 3788 // count == 4, but allow other counts 3789 if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status)) { 3790 if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fWideDayPeriods, 3791 2, *dayPeriod)) > 0) { 3792 return newStart; 3793 } 3794 } 3795 3796 return -start; 3797 } 3798 } 3799 3800 case UDAT_FLEXIBLE_DAY_PERIOD_FIELD: 3801 { 3802 U_ASSERT(dayPeriod != nullptr); 3803 int32_t newStart = 0; 3804 3805 if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) { 3806 if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fAbbreviatedDayPeriods, 3807 fSymbols->fAbbreviatedDayPeriodsCount, *dayPeriod)) > 0) { 3808 return newStart; 3809 } 3810 } 3811 if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 5) { 3812 if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fNarrowDayPeriods, 3813 fSymbols->fNarrowDayPeriodsCount, *dayPeriod)) > 0) { 3814 return newStart; 3815 } 3816 } 3817 if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) { 3818 if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fWideDayPeriods, 3819 fSymbols->fWideDayPeriodsCount, *dayPeriod)) > 0) { 3820 return newStart; 3821 } 3822 } 3823 3824 return -start; 3825 } 3826 3827 default: 3828 // Handle "generic" fields 3829 // this is now handled below, outside the switch block 3830 break; 3831 } 3832 // Handle "generic" fields: 3833 // switch default case now handled here (outside switch block) to allow 3834 // parsing of some string fields as digits for lenient case 3835 3836 int32_t parseStart = pos.getIndex(); 3837 const UnicodeString* src; 3838 if (obeyCount) { 3839 if ((start+count) > text.length()) { 3840 return -start; 3841 } 3842 text.extractBetween(0, start + count, temp); 3843 src = &temp; 3844 } else { 3845 src = &text; 3846 } 3847 parseInt(*src, number, pos, allowNegative,currentNumberFormat); 3848 if (obeyCount && !isLenient() && pos.getIndex() < start + count) { 3849 return -start; 3850 } 3851 if (pos.getIndex() != parseStart) { 3852 int32_t val = number.getLong(); 3853 3854 // Don't need suffix processing here (as in number processing at the beginning of the function); 3855 // the new fields being handled as numeric values (month, weekdays, quarters) should not have suffixes. 3856 3857 if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status)) { 3858 // Check the range of the value 3859 int32_t bias = gFieldRangeBias[patternCharIndex]; 3860 if (bias >= 0 && (val > cal.getMaximum(field) + bias || val < cal.getMinimum(field) + bias)) { 3861 return -start; 3862 } 3863 } 3864 3865 // For the following, need to repeat some of the "if (gotNumber)" code above: 3866 // UDAT_[STANDALONE_]MONTH_FIELD, UDAT_DOW_LOCAL_FIELD, UDAT_STANDALONE_DAY_FIELD, 3867 // UDAT_[STANDALONE_]QUARTER_FIELD 3868 switch (patternCharIndex) { 3869 case UDAT_MONTH_FIELD: 3870 // See notes under UDAT_MONTH_FIELD case above 3871 if (typeid(cal) == typeid(HebrewCalendar)) { 3872 HebrewCalendar *hc = (HebrewCalendar*)&cal; 3873 if (cal.isSet(UCAL_YEAR)) { 3874 UErrorCode monthStatus = U_ZERO_ERROR; 3875 if (!hc->isLeapYear(hc->get(UCAL_YEAR, monthStatus)) && val >= 6) { 3876 cal.set(UCAL_MONTH, val); 3877 } else { 3878 cal.set(UCAL_MONTH, val - 1); 3879 } 3880 } else { 3881 saveHebrewMonth = val; 3882 } 3883 } else { 3884 cal.set(UCAL_MONTH, val - 1); 3885 } 3886 break; 3887 case UDAT_STANDALONE_MONTH_FIELD: 3888 cal.set(UCAL_MONTH, val - 1); 3889 break; 3890 case UDAT_DOW_LOCAL_FIELD: 3891 case UDAT_STANDALONE_DAY_FIELD: 3892 cal.set(UCAL_DOW_LOCAL, val); 3893 break; 3894 case UDAT_QUARTER_FIELD: 3895 case UDAT_STANDALONE_QUARTER_FIELD: 3896 cal.set(UCAL_MONTH, (val - 1) * 3); 3897 break; 3898 case UDAT_RELATED_YEAR_FIELD: 3899 cal.setRelatedYear(val); 3900 break; 3901 default: 3902 cal.set(field, val); 3903 break; 3904 } 3905 return pos.getIndex(); 3906 } 3907 return -start; 3908 } 3909 3910 /** 3911 * Parse an integer using fNumberFormat. This method is semantically 3912 * const, but actually may modify fNumberFormat. 3913 */ 3914 void SimpleDateFormat::parseInt(const UnicodeString& text, 3915 Formattable& number, 3916 ParsePosition& pos, 3917 UBool allowNegative, 3918 const NumberFormat *fmt) const { 3919 parseInt(text, number, -1, pos, allowNegative,fmt); 3920 } 3921 3922 /** 3923 * Parse an integer using fNumberFormat up to maxDigits. 3924 */ 3925 void SimpleDateFormat::parseInt(const UnicodeString& text, 3926 Formattable& number, 3927 int32_t maxDigits, 3928 ParsePosition& pos, 3929 UBool allowNegative, 3930 const NumberFormat *fmt) const { 3931 UnicodeString oldPrefix; 3932 const auto* fmtAsDF = dynamic_cast<const DecimalFormat*>(fmt); 3933 LocalPointer<DecimalFormat> df; 3934 if (!allowNegative && fmtAsDF != nullptr) { 3935 df.adoptInstead(fmtAsDF->clone()); 3936 if (df.isNull()) { 3937 // Memory allocation error 3938 return; 3939 } 3940 df->setNegativePrefix(UnicodeString(true, SUPPRESS_NEGATIVE_PREFIX, -1)); 3941 fmt = df.getAlias(); 3942 } 3943 int32_t oldPos = pos.getIndex(); 3944 fmt->parse(text, number, pos); 3945 3946 if (maxDigits > 0) { 3947 // adjust the result to fit into 3948 // the maxDigits and move the position back 3949 int32_t nDigits = pos.getIndex() - oldPos; 3950 if (nDigits > maxDigits) { 3951 int32_t val = number.getLong(); 3952 nDigits -= maxDigits; 3953 while (nDigits > 0) { 3954 val /= 10; 3955 nDigits--; 3956 } 3957 pos.setIndex(oldPos + maxDigits); 3958 number.setLong(val); 3959 } 3960 } 3961 } 3962 3963 int32_t SimpleDateFormat::countDigits(const UnicodeString& text, int32_t start, int32_t end) const { 3964 int32_t numDigits = 0; 3965 int32_t idx = start; 3966 while (idx < end) { 3967 UChar32 cp = text.char32At(idx); 3968 if (u_isdigit(cp)) { 3969 numDigits++; 3970 } 3971 idx += U16_LENGTH(cp); 3972 } 3973 return numDigits; 3974 } 3975 3976 //---------------------------------------------------------------------- 3977 3978 void SimpleDateFormat::translatePattern(const UnicodeString& originalPattern, 3979 UnicodeString& translatedPattern, 3980 const UnicodeString& from, 3981 const UnicodeString& to, 3982 UErrorCode& status) 3983 { 3984 // run through the pattern and convert any pattern symbols from the version 3985 // in "from" to the corresponding character in "to". This code takes 3986 // quoted strings into account (it doesn't try to translate them), and it signals 3987 // an error if a particular "pattern character" doesn't appear in "from". 3988 // Depending on the values of "from" and "to" this can convert from generic 3989 // to localized patterns or localized to generic. 3990 if (U_FAILURE(status)) { 3991 return; 3992 } 3993 3994 translatedPattern.remove(); 3995 UBool inQuote = false; 3996 for (int32_t i = 0; i < originalPattern.length(); ++i) { 3997 char16_t c = originalPattern[i]; 3998 if (inQuote) { 3999 if (c == QUOTE) { 4000 inQuote = false; 4001 } 4002 } else { 4003 if (c == QUOTE) { 4004 inQuote = true; 4005 } else if (isSyntaxChar(c)) { 4006 int32_t ci = from.indexOf(c); 4007 if (ci == -1) { 4008 status = U_INVALID_FORMAT_ERROR; 4009 return; 4010 } 4011 c = to[ci]; 4012 } 4013 } 4014 translatedPattern += c; 4015 } 4016 if (inQuote) { 4017 status = U_INVALID_FORMAT_ERROR; 4018 return; 4019 } 4020 } 4021 4022 //---------------------------------------------------------------------- 4023 4024 UnicodeString& 4025 SimpleDateFormat::toPattern(UnicodeString& result) const 4026 { 4027 result = fPattern; 4028 return result; 4029 } 4030 4031 //---------------------------------------------------------------------- 4032 4033 UnicodeString& 4034 SimpleDateFormat::toLocalizedPattern(UnicodeString& result, 4035 UErrorCode& status) const 4036 { 4037 translatePattern(fPattern, result, 4038 UnicodeString(DateFormatSymbols::getPatternUChars()), 4039 fSymbols->fLocalPatternChars, status); 4040 return result; 4041 } 4042 4043 //---------------------------------------------------------------------- 4044 4045 void 4046 SimpleDateFormat::applyPattern(const UnicodeString& pattern) 4047 { 4048 fPattern = pattern; 4049 parsePattern(); 4050 4051 // Hack to update use of Gannen year numbering for ja@calendar=japanese - 4052 // use only if format is non-numeric (includes 年) and no other fDateOverride. 4053 if (fCalendar != nullptr && typeid(*fCalendar) == typeid(JapaneseCalendar) && 4054 uprv_strcmp(fLocale.getLanguage(),"ja") == 0) { 4055 if (fDateOverride==UnicodeString(u"y=jpanyear") && !fHasHanYearChar) { 4056 // Gannen numbering is set but new pattern should not use it, unset; 4057 // use procedure from adoptNumberFormat to clear overrides 4058 if (fSharedNumberFormatters) { 4059 freeSharedNumberFormatters(fSharedNumberFormatters); 4060 fSharedNumberFormatters = nullptr; 4061 } 4062 fDateOverride.setToBogus(); // record status 4063 } else if (fDateOverride.isBogus() && fHasHanYearChar) { 4064 // No current override (=> no Gannen numbering) but new pattern needs it; 4065 // use procedures from initNUmberFormatters / adoptNumberFormat 4066 umtx_lock(&LOCK); 4067 if (fSharedNumberFormatters == nullptr) { 4068 fSharedNumberFormatters = allocSharedNumberFormatters(); 4069 } 4070 umtx_unlock(&LOCK); 4071 if (fSharedNumberFormatters != nullptr) { 4072 Locale ovrLoc(fLocale.getLanguage(),fLocale.getCountry(),fLocale.getVariant(),"numbers=jpanyear"); 4073 UErrorCode status = U_ZERO_ERROR; 4074 const SharedNumberFormat *snf = createSharedNumberFormat(ovrLoc, status); 4075 if (U_SUCCESS(status)) { 4076 // Now that we have an appropriate number formatter, fill in the 4077 // appropriate slot in the number formatters table. 4078 UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(u'y'); 4079 SharedObject::copyPtr(snf, fSharedNumberFormatters[patternCharIndex]); 4080 snf->deleteIfZeroRefCount(); 4081 fDateOverride.setTo(u"y=jpanyear", -1); // record status 4082 } 4083 } 4084 } 4085 } 4086 } 4087 4088 //---------------------------------------------------------------------- 4089 4090 void 4091 SimpleDateFormat::applyLocalizedPattern(const UnicodeString& pattern, 4092 UErrorCode &status) 4093 { 4094 translatePattern(pattern, fPattern, 4095 fSymbols->fLocalPatternChars, 4096 UnicodeString(DateFormatSymbols::getPatternUChars()), status); 4097 } 4098 4099 //---------------------------------------------------------------------- 4100 4101 const DateFormatSymbols* 4102 SimpleDateFormat::getDateFormatSymbols() const 4103 { 4104 return fSymbols; 4105 } 4106 4107 //---------------------------------------------------------------------- 4108 4109 void 4110 SimpleDateFormat::adoptDateFormatSymbols(DateFormatSymbols* newFormatSymbols) 4111 { 4112 delete fSymbols; 4113 fSymbols = newFormatSymbols; 4114 } 4115 4116 //---------------------------------------------------------------------- 4117 void 4118 SimpleDateFormat::setDateFormatSymbols(const DateFormatSymbols& newFormatSymbols) 4119 { 4120 delete fSymbols; 4121 fSymbols = new DateFormatSymbols(newFormatSymbols); 4122 } 4123 4124 //---------------------------------------------------------------------- 4125 const TimeZoneFormat* 4126 SimpleDateFormat::getTimeZoneFormat() const { 4127 // TimeZoneFormat initialization might fail when out of memory. 4128 // If we always initialize TimeZoneFormat instance, we can return 4129 // such status there. For now, this implementation lazily instantiates 4130 // a TimeZoneFormat for performance optimization reasons, but cannot 4131 // propagate such error (probably just out of memory case) to the caller. 4132 UErrorCode status = U_ZERO_ERROR; 4133 return (const TimeZoneFormat*)tzFormat(status); 4134 } 4135 4136 //---------------------------------------------------------------------- 4137 void 4138 SimpleDateFormat::adoptTimeZoneFormat(TimeZoneFormat* timeZoneFormatToAdopt) 4139 { 4140 delete fTimeZoneFormat; 4141 fTimeZoneFormat = timeZoneFormatToAdopt; 4142 } 4143 4144 //---------------------------------------------------------------------- 4145 void 4146 SimpleDateFormat::setTimeZoneFormat(const TimeZoneFormat& newTimeZoneFormat) 4147 { 4148 delete fTimeZoneFormat; 4149 fTimeZoneFormat = new TimeZoneFormat(newTimeZoneFormat); 4150 } 4151 4152 //---------------------------------------------------------------------- 4153 4154 4155 void SimpleDateFormat::adoptCalendar(Calendar* calendarToAdopt) 4156 { 4157 UErrorCode status = U_ZERO_ERROR; 4158 Locale calLocale(fLocale); 4159 calLocale.setKeywordValue("calendar", calendarToAdopt->getType(), status); 4160 DateFormatSymbols *newSymbols = 4161 DateFormatSymbols::createForLocale(calLocale, status); 4162 if (U_FAILURE(status)) { 4163 delete calendarToAdopt; 4164 return; 4165 } 4166 DateFormat::adoptCalendar(calendarToAdopt); 4167 delete fSymbols; 4168 fSymbols = newSymbols; 4169 initializeDefaultCentury(); // we need a new century (possibly) 4170 } 4171 4172 4173 //---------------------------------------------------------------------- 4174 4175 4176 // override the DateFormat implementation in order to 4177 // lazily initialize fCapitalizationBrkIter 4178 void 4179 SimpleDateFormat::setContext(UDisplayContext value, UErrorCode& status) 4180 { 4181 DateFormat::setContext(value, status); 4182 #if !UCONFIG_NO_BREAK_ITERATION 4183 if (U_SUCCESS(status)) { 4184 if ( fCapitalizationBrkIter == nullptr && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || 4185 value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE) ) { 4186 status = U_ZERO_ERROR; 4187 fCapitalizationBrkIter = BreakIterator::createSentenceInstance(fLocale, status); 4188 if (U_FAILURE(status)) { 4189 delete fCapitalizationBrkIter; 4190 fCapitalizationBrkIter = nullptr; 4191 } 4192 } 4193 } 4194 #endif 4195 } 4196 4197 4198 //---------------------------------------------------------------------- 4199 4200 4201 UBool 4202 SimpleDateFormat::isFieldUnitIgnored(UCalendarDateFields field) const { 4203 return isFieldUnitIgnored(fPattern, field); 4204 } 4205 4206 4207 UBool 4208 SimpleDateFormat::isFieldUnitIgnored(const UnicodeString& pattern, 4209 UCalendarDateFields field) { 4210 int32_t fieldLevel = fgCalendarFieldToLevel[field]; 4211 int32_t level; 4212 char16_t ch; 4213 UBool inQuote = false; 4214 char16_t prevCh = 0; 4215 int32_t count = 0; 4216 4217 for (int32_t i = 0; i < pattern.length(); ++i) { 4218 ch = pattern[i]; 4219 if (ch != prevCh && count > 0) { 4220 level = getLevelFromChar(prevCh); 4221 // the larger the level, the smaller the field unit. 4222 if (fieldLevel <= level) { 4223 return false; 4224 } 4225 count = 0; 4226 } 4227 if (ch == QUOTE) { 4228 if ((i+1) < pattern.length() && pattern[i+1] == QUOTE) { 4229 ++i; 4230 } else { 4231 inQuote = ! inQuote; 4232 } 4233 } 4234 else if (!inQuote && isSyntaxChar(ch)) { 4235 prevCh = ch; 4236 ++count; 4237 } 4238 } 4239 if (count > 0) { 4240 // last item 4241 level = getLevelFromChar(prevCh); 4242 if (fieldLevel <= level) { 4243 return false; 4244 } 4245 } 4246 return true; 4247 } 4248 4249 //---------------------------------------------------------------------- 4250 4251 const Locale& 4252 SimpleDateFormat::getSmpFmtLocale() const { 4253 return fLocale; 4254 } 4255 4256 //---------------------------------------------------------------------- 4257 4258 int32_t 4259 SimpleDateFormat::checkIntSuffix(const UnicodeString& text, int32_t start, 4260 int32_t patLoc, UBool isNegative) const { 4261 // local variables 4262 UnicodeString suf; 4263 int32_t patternMatch; 4264 int32_t textPreMatch; 4265 int32_t textPostMatch; 4266 4267 // check that we are still in range 4268 if ( (start > text.length()) || 4269 (start < 0) || 4270 (patLoc < 0) || 4271 (patLoc > fPattern.length())) { 4272 // out of range, don't advance location in text 4273 return start; 4274 } 4275 4276 // get the suffix 4277 DecimalFormat* decfmt = dynamic_cast<DecimalFormat*>(fNumberFormat); 4278 if (decfmt != nullptr) { 4279 if (isNegative) { 4280 suf = decfmt->getNegativeSuffix(suf); 4281 } 4282 else { 4283 suf = decfmt->getPositiveSuffix(suf); 4284 } 4285 } 4286 4287 // check for suffix 4288 if (suf.length() <= 0) { 4289 return start; 4290 } 4291 4292 // check suffix will be encountered in the pattern 4293 patternMatch = compareSimpleAffix(suf,fPattern,patLoc); 4294 4295 // check if a suffix will be encountered in the text 4296 textPreMatch = compareSimpleAffix(suf,text,start); 4297 4298 // check if a suffix was encountered in the text 4299 textPostMatch = compareSimpleAffix(suf,text,start-suf.length()); 4300 4301 // check for suffix match 4302 if ((textPreMatch >= 0) && (patternMatch >= 0) && (textPreMatch == patternMatch)) { 4303 return start; 4304 } 4305 else if ((textPostMatch >= 0) && (patternMatch >= 0) && (textPostMatch == patternMatch)) { 4306 return start - suf.length(); 4307 } 4308 4309 // should not get here 4310 return start; 4311 } 4312 4313 //---------------------------------------------------------------------- 4314 4315 int32_t 4316 SimpleDateFormat::compareSimpleAffix(const UnicodeString& affix, 4317 const UnicodeString& input, 4318 int32_t pos) const { 4319 int32_t start = pos; 4320 for (int32_t i=0; i<affix.length(); ) { 4321 UChar32 c = affix.char32At(i); 4322 int32_t len = U16_LENGTH(c); 4323 if (PatternProps::isWhiteSpace(c)) { 4324 // We may have a pattern like: \u200F \u0020 4325 // and input text like: \u200F \u0020 4326 // Note that U+200F and U+0020 are Pattern_White_Space but only 4327 // U+0020 is UWhiteSpace. So we have to first do a direct 4328 // match of the run of Pattern_White_Space in the pattern, 4329 // then match any extra characters. 4330 UBool literalMatch = false; 4331 while (pos < input.length() && 4332 input.char32At(pos) == c) { 4333 literalMatch = true; 4334 i += len; 4335 pos += len; 4336 if (i == affix.length()) { 4337 break; 4338 } 4339 c = affix.char32At(i); 4340 len = U16_LENGTH(c); 4341 if (!PatternProps::isWhiteSpace(c)) { 4342 break; 4343 } 4344 } 4345 4346 // Advance over run in pattern 4347 i = skipPatternWhiteSpace(affix, i); 4348 4349 // Advance over run in input text 4350 // Must see at least one white space char in input, 4351 // unless we've already matched some characters literally. 4352 int32_t s = pos; 4353 pos = skipUWhiteSpace(input, pos); 4354 if (pos == s && !literalMatch) { 4355 return -1; 4356 } 4357 4358 // If we skip UWhiteSpace in the input text, we need to skip it in the pattern. 4359 // Otherwise, the previous lines may have skipped over text (such as U+00A0) that 4360 // is also in the affix. 4361 i = skipUWhiteSpace(affix, i); 4362 } else { 4363 if (pos < input.length() && 4364 input.char32At(pos) == c) { 4365 i += len; 4366 pos += len; 4367 } else { 4368 return -1; 4369 } 4370 } 4371 } 4372 return pos - start; 4373 } 4374 4375 //---------------------------------------------------------------------- 4376 4377 int32_t 4378 SimpleDateFormat::skipPatternWhiteSpace(const UnicodeString& text, int32_t pos) const { 4379 const char16_t* s = text.getBuffer(); 4380 return static_cast<int32_t>(PatternProps::skipWhiteSpace(s + pos, text.length() - pos) - s); 4381 } 4382 4383 //---------------------------------------------------------------------- 4384 4385 int32_t 4386 SimpleDateFormat::skipUWhiteSpace(const UnicodeString& text, int32_t pos) const { 4387 while (pos < text.length()) { 4388 UChar32 c = text.char32At(pos); 4389 if (!u_isUWhiteSpace(c)) { 4390 break; 4391 } 4392 pos += U16_LENGTH(c); 4393 } 4394 return pos; 4395 } 4396 4397 //---------------------------------------------------------------------- 4398 4399 // Lazy TimeZoneFormat instantiation, semantically const. 4400 TimeZoneFormat * 4401 SimpleDateFormat::tzFormat(UErrorCode &status) const { 4402 Mutex m(&LOCK); 4403 if (fTimeZoneFormat == nullptr && U_SUCCESS(status)) { 4404 const_cast<SimpleDateFormat *>(this)->fTimeZoneFormat = 4405 TimeZoneFormat::createInstance(fLocale, status); 4406 } 4407 return fTimeZoneFormat; 4408 } 4409 4410 void SimpleDateFormat::parsePattern() { 4411 fHasMinute = false; 4412 fHasSecond = false; 4413 fHasHanYearChar = false; 4414 4415 int len = fPattern.length(); 4416 UBool inQuote = false; 4417 for (int32_t i = 0; i < len; ++i) { 4418 char16_t ch = fPattern[i]; 4419 if (ch == QUOTE) { 4420 inQuote = !inQuote; 4421 } 4422 if (ch == 0x5E74) { // don't care whether this is inside quotes 4423 fHasHanYearChar = true; 4424 } 4425 if (!inQuote) { 4426 if (ch == 0x6D) { // 0x6D == 'm' 4427 fHasMinute = true; 4428 } 4429 if (ch == 0x73) { // 0x73 == 's' 4430 fHasSecond = true; 4431 } 4432 } 4433 } 4434 } 4435 4436 U_NAMESPACE_END 4437 4438 #endif /* #if !UCONFIG_NO_FORMATTING */ 4439 4440 //eof