plurfmt.cpp (20659B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2009-2015, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 * 9 * File PLURFMT.CPP 10 ******************************************************************************* 11 */ 12 13 #include "unicode/decimfmt.h" 14 #include "unicode/messagepattern.h" 15 #include "unicode/plurfmt.h" 16 #include "unicode/plurrule.h" 17 #include "unicode/utypes.h" 18 #include "cmemory.h" 19 #include "messageimpl.h" 20 #include "nfrule.h" 21 #include "plurrule_impl.h" 22 #include "uassert.h" 23 #include "uhash.h" 24 #include "number_decimalquantity.h" 25 #include "number_utils.h" 26 #include "number_utypes.h" 27 28 #if !UCONFIG_NO_FORMATTING 29 30 U_NAMESPACE_BEGIN 31 32 using number::impl::DecimalQuantity; 33 34 static const char16_t OTHER_STRING[] = { 35 0x6F, 0x74, 0x68, 0x65, 0x72, 0 // "other" 36 }; 37 38 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat) 39 40 PluralFormat::PluralFormat(UErrorCode& status) 41 : locale(Locale::getDefault()), 42 msgPattern(status), 43 numberFormat(nullptr), 44 offset(0) { 45 init(nullptr, UPLURAL_TYPE_CARDINAL, status); 46 } 47 48 PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status) 49 : locale(loc), 50 msgPattern(status), 51 numberFormat(nullptr), 52 offset(0) { 53 init(nullptr, UPLURAL_TYPE_CARDINAL, status); 54 } 55 56 PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status) 57 : locale(Locale::getDefault()), 58 msgPattern(status), 59 numberFormat(nullptr), 60 offset(0) { 61 init(&rules, UPLURAL_TYPE_COUNT, status); 62 } 63 64 PluralFormat::PluralFormat(const Locale& loc, 65 const PluralRules& rules, 66 UErrorCode& status) 67 : locale(loc), 68 msgPattern(status), 69 numberFormat(nullptr), 70 offset(0) { 71 init(&rules, UPLURAL_TYPE_COUNT, status); 72 } 73 74 PluralFormat::PluralFormat(const Locale& loc, 75 UPluralType type, 76 UErrorCode& status) 77 : locale(loc), 78 msgPattern(status), 79 numberFormat(nullptr), 80 offset(0) { 81 init(nullptr, type, status); 82 } 83 84 PluralFormat::PluralFormat(const UnicodeString& pat, 85 UErrorCode& status) 86 : locale(Locale::getDefault()), 87 msgPattern(status), 88 numberFormat(nullptr), 89 offset(0) { 90 init(nullptr, UPLURAL_TYPE_CARDINAL, status); 91 applyPattern(pat, status); 92 } 93 94 PluralFormat::PluralFormat(const Locale& loc, 95 const UnicodeString& pat, 96 UErrorCode& status) 97 : locale(loc), 98 msgPattern(status), 99 numberFormat(nullptr), 100 offset(0) { 101 init(nullptr, UPLURAL_TYPE_CARDINAL, status); 102 applyPattern(pat, status); 103 } 104 105 PluralFormat::PluralFormat(const PluralRules& rules, 106 const UnicodeString& pat, 107 UErrorCode& status) 108 : locale(Locale::getDefault()), 109 msgPattern(status), 110 numberFormat(nullptr), 111 offset(0) { 112 init(&rules, UPLURAL_TYPE_COUNT, status); 113 applyPattern(pat, status); 114 } 115 116 PluralFormat::PluralFormat(const Locale& loc, 117 const PluralRules& rules, 118 const UnicodeString& pat, 119 UErrorCode& status) 120 : locale(loc), 121 msgPattern(status), 122 numberFormat(nullptr), 123 offset(0) { 124 init(&rules, UPLURAL_TYPE_COUNT, status); 125 applyPattern(pat, status); 126 } 127 128 PluralFormat::PluralFormat(const Locale& loc, 129 UPluralType type, 130 const UnicodeString& pat, 131 UErrorCode& status) 132 : locale(loc), 133 msgPattern(status), 134 numberFormat(nullptr), 135 offset(0) { 136 init(nullptr, type, status); 137 applyPattern(pat, status); 138 } 139 140 PluralFormat::PluralFormat(const PluralFormat& other) 141 : Format(other), 142 locale(other.locale), 143 msgPattern(other.msgPattern), 144 numberFormat(nullptr), 145 offset(other.offset) { 146 copyObjects(other); 147 } 148 149 void 150 PluralFormat::copyObjects(const PluralFormat& other) { 151 UErrorCode status = U_ZERO_ERROR; 152 delete numberFormat; 153 delete pluralRulesWrapper.pluralRules; 154 if (other.numberFormat == nullptr) { 155 numberFormat = NumberFormat::createInstance(locale, status); 156 } else { 157 numberFormat = other.numberFormat->clone(); 158 } 159 if (other.pluralRulesWrapper.pluralRules == nullptr) { 160 pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status); 161 } else { 162 pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone(); 163 } 164 } 165 166 167 PluralFormat::~PluralFormat() { 168 delete numberFormat; 169 } 170 171 void 172 PluralFormat::init(const PluralRules* rules, UPluralType type, UErrorCode& status) { 173 if (U_FAILURE(status)) { 174 return; 175 } 176 177 if (rules==nullptr) { 178 pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, type, status); 179 } else { 180 pluralRulesWrapper.pluralRules = rules->clone(); 181 if (pluralRulesWrapper.pluralRules == nullptr) { 182 status = U_MEMORY_ALLOCATION_ERROR; 183 return; 184 } 185 } 186 187 numberFormat= NumberFormat::createInstance(locale, status); 188 } 189 190 void 191 PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) { 192 msgPattern.parsePluralStyle(newPattern, nullptr, status); 193 if (U_FAILURE(status)) { 194 msgPattern.clear(); 195 offset = 0; 196 return; 197 } 198 offset = msgPattern.getPluralOffset(0); 199 } 200 201 UnicodeString& 202 PluralFormat::format(const Formattable& obj, 203 UnicodeString& appendTo, 204 FieldPosition& pos, 205 UErrorCode& status) const 206 { 207 if (U_FAILURE(status)) return appendTo; 208 209 if (obj.isNumeric()) { 210 return format(obj, obj.getDouble(), appendTo, pos, status); 211 } else { 212 status = U_ILLEGAL_ARGUMENT_ERROR; 213 return appendTo; 214 } 215 } 216 217 UnicodeString 218 PluralFormat::format(int32_t number, UErrorCode& status) const { 219 FieldPosition fpos(FieldPosition::DONT_CARE); 220 UnicodeString result; 221 return format(Formattable(number), number, result, fpos, status); 222 } 223 224 UnicodeString 225 PluralFormat::format(double number, UErrorCode& status) const { 226 FieldPosition fpos(FieldPosition::DONT_CARE); 227 UnicodeString result; 228 return format(Formattable(number), number, result, fpos, status); 229 } 230 231 232 UnicodeString& 233 PluralFormat::format(int32_t number, 234 UnicodeString& appendTo, 235 FieldPosition& pos, 236 UErrorCode& status) const { 237 return format(Formattable(number), static_cast<double>(number), appendTo, pos, status); 238 } 239 240 UnicodeString& 241 PluralFormat::format(double number, 242 UnicodeString& appendTo, 243 FieldPosition& pos, 244 UErrorCode& status) const { 245 return format(Formattable(number), number, appendTo, pos, status); 246 } 247 248 UnicodeString& 249 PluralFormat::format(const Formattable& numberObject, double number, 250 UnicodeString& appendTo, 251 FieldPosition& pos, 252 UErrorCode& status) const { 253 if (U_FAILURE(status)) { 254 return appendTo; 255 } 256 if (msgPattern.countParts() == 0) { 257 return numberFormat->format(numberObject, appendTo, pos, status); 258 } 259 260 // Get the appropriate sub-message. 261 // Select it based on the formatted number-offset. 262 double numberMinusOffset = number - offset; 263 // Call NumberFormatter to get both the DecimalQuantity and the string. 264 // This call site needs to use more internal APIs than the Java equivalent. 265 number::impl::UFormattedNumberData data; 266 if (offset == 0) { 267 // could be BigDecimal etc. 268 numberObject.populateDecimalQuantity(data.quantity, status); 269 } else { 270 data.quantity.setToDouble(numberMinusOffset); 271 } 272 UnicodeString numberString; 273 auto *decFmt = dynamic_cast<DecimalFormat *>(numberFormat); 274 if(decFmt != nullptr) { 275 const number::LocalizedNumberFormatter* lnf = decFmt->toNumberFormatter(status); 276 if (U_FAILURE(status)) { 277 return appendTo; 278 } 279 lnf->formatImpl(&data, status); // mutates &data 280 if (U_FAILURE(status)) { 281 return appendTo; 282 } 283 numberString = data.getStringRef().toUnicodeString(); 284 } else { 285 if (offset == 0) { 286 numberFormat->format(numberObject, numberString, status); 287 } else { 288 numberFormat->format(numberMinusOffset, numberString, status); 289 } 290 } 291 292 int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, &data.quantity, number, status); 293 if (U_FAILURE(status)) { return appendTo; } 294 // Replace syntactic # signs in the top level of this sub-message 295 // (not in nested arguments) with the formatted number-offset. 296 const UnicodeString& pattern = msgPattern.getPatternString(); 297 int32_t prevIndex = msgPattern.getPart(partIndex).getLimit(); 298 for (;;) { 299 const MessagePattern::Part& part = msgPattern.getPart(++partIndex); 300 const UMessagePatternPartType type = part.getType(); 301 int32_t index = part.getIndex(); 302 if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) { 303 return appendTo.append(pattern, prevIndex, index - prevIndex); 304 } else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) || 305 (type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) { 306 appendTo.append(pattern, prevIndex, index - prevIndex); 307 if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) { 308 appendTo.append(numberString); 309 } 310 prevIndex = part.getLimit(); 311 } else if (type == UMSGPAT_PART_TYPE_ARG_START) { 312 appendTo.append(pattern, prevIndex, index - prevIndex); 313 prevIndex = index; 314 partIndex = msgPattern.getLimitPartIndex(partIndex); 315 index = msgPattern.getPart(partIndex).getLimit(); 316 MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo); 317 prevIndex = index; 318 } 319 } 320 } 321 322 UnicodeString& 323 PluralFormat::toPattern(UnicodeString& appendTo) { 324 if (0 == msgPattern.countParts()) { 325 appendTo.setToBogus(); 326 } else { 327 appendTo.append(msgPattern.getPatternString()); 328 } 329 return appendTo; 330 } 331 332 void 333 PluralFormat::setLocale(const Locale& loc, UErrorCode& status) { 334 if (U_FAILURE(status)) { 335 return; 336 } 337 locale = loc; 338 msgPattern.clear(); 339 delete numberFormat; 340 offset = 0; 341 numberFormat = nullptr; 342 pluralRulesWrapper.reset(); 343 init(nullptr, UPLURAL_TYPE_CARDINAL, status); 344 } 345 346 void 347 PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) { 348 if (U_FAILURE(status)) { 349 return; 350 } 351 NumberFormat* nf = format->clone(); 352 if (nf != nullptr) { 353 delete numberFormat; 354 numberFormat = nf; 355 } else { 356 status = U_MEMORY_ALLOCATION_ERROR; 357 } 358 } 359 360 PluralFormat* 361 PluralFormat::clone() const 362 { 363 return new PluralFormat(*this); 364 } 365 366 367 PluralFormat& 368 PluralFormat::operator=(const PluralFormat& other) { 369 if (this != &other) { 370 locale = other.locale; 371 msgPattern = other.msgPattern; 372 offset = other.offset; 373 copyObjects(other); 374 } 375 376 return *this; 377 } 378 379 bool 380 PluralFormat::operator==(const Format& other) const { 381 if (this == &other) { 382 return true; 383 } 384 if (!Format::operator==(other)) { 385 return false; 386 } 387 const PluralFormat& o = (const PluralFormat&)other; 388 return 389 locale == o.locale && 390 msgPattern == o.msgPattern && // implies same offset 391 (numberFormat == nullptr) == (o.numberFormat == nullptr) && 392 (numberFormat == nullptr || *numberFormat == *o.numberFormat) && 393 (pluralRulesWrapper.pluralRules == nullptr) == (o.pluralRulesWrapper.pluralRules == nullptr) && 394 (pluralRulesWrapper.pluralRules == nullptr || 395 *pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules); 396 } 397 398 bool 399 PluralFormat::operator!=(const Format& other) const { 400 return !operator==(other); 401 } 402 403 void 404 PluralFormat::parseObject(const UnicodeString& /*source*/, 405 Formattable& /*result*/, 406 ParsePosition& pos) const 407 { 408 // Parsing not supported. 409 pos.setErrorIndex(pos.getIndex()); 410 } 411 412 int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex, 413 const PluralSelector& selector, void *context, 414 double number, UErrorCode& ec) { 415 if (U_FAILURE(ec)) { 416 return 0; 417 } 418 int32_t count=pattern.countParts(); 419 double offset; 420 const MessagePattern::Part* part=&pattern.getPart(partIndex); 421 if (MessagePattern::Part::hasNumericValue(part->getType())) { 422 offset=pattern.getNumericValue(*part); 423 ++partIndex; 424 } else { 425 offset=0; 426 } 427 // The keyword is empty until we need to match against a non-explicit, not-"other" value. 428 // Then we get the keyword from the selector. 429 // (In other words, we never call the selector if we match against an explicit value, 430 // or if the only non-explicit keyword is "other".) 431 UnicodeString keyword; 432 UnicodeString other(false, OTHER_STRING, 5); 433 // When we find a match, we set msgStart>0 and also set this boolean to true 434 // to avoid matching the keyword again (duplicates are allowed) 435 // while we continue to look for an explicit-value match. 436 UBool haveKeywordMatch=false; 437 // msgStart is 0 until we find any appropriate sub-message. 438 // We remember the first "other" sub-message if we have not seen any 439 // appropriate sub-message before. 440 // We remember the first matching-keyword sub-message if we have not seen 441 // one of those before. 442 // (The parser allows [does not check for] duplicate keywords. 443 // We just have to make sure to take the first one.) 444 // We avoid matching the keyword twice by also setting haveKeywordMatch=true 445 // at the first keyword match. 446 // We keep going until we find an explicit-value match or reach the end of the plural style. 447 int32_t msgStart=0; 448 // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples 449 // until ARG_LIMIT or end of plural-only pattern. 450 do { 451 part=&pattern.getPart(partIndex++); 452 const UMessagePatternPartType type = part->getType(); 453 if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) { 454 break; 455 } 456 U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR); 457 // part is an ARG_SELECTOR followed by an optional explicit value, and then a message 458 if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) { 459 // explicit value like "=2" 460 part=&pattern.getPart(partIndex++); 461 if(number==pattern.getNumericValue(*part)) { 462 // matches explicit value 463 return partIndex; 464 } 465 } else if(!haveKeywordMatch) { 466 // plural keyword like "few" or "other" 467 // Compare "other" first and call the selector if this is not "other". 468 if(pattern.partSubstringMatches(*part, other)) { 469 if(msgStart==0) { 470 msgStart=partIndex; 471 if(0 == keyword.compare(other)) { 472 // This is the first "other" sub-message, 473 // and the selected keyword is also "other". 474 // Do not match "other" again. 475 haveKeywordMatch=true; 476 } 477 } 478 } else { 479 if(keyword.isEmpty()) { 480 keyword=selector.select(context, number-offset, ec); 481 if(msgStart!=0 && (0 == keyword.compare(other))) { 482 // We have already seen an "other" sub-message. 483 // Do not match "other" again. 484 haveKeywordMatch=true; 485 // Skip keyword matching but do getLimitPartIndex(). 486 } 487 } 488 if(!haveKeywordMatch && pattern.partSubstringMatches(*part, keyword)) { 489 // keyword matches 490 msgStart=partIndex; 491 // Do not match this keyword again. 492 haveKeywordMatch=true; 493 } 494 } 495 } 496 partIndex=pattern.getLimitPartIndex(partIndex); 497 } while(++partIndex<count); 498 return msgStart; 499 } 500 501 void PluralFormat::parseType(const UnicodeString& source, const NFRule *rbnfLenientScanner, Formattable& result, FieldPosition& pos) const { 502 // If no pattern was applied, return null. 503 if (msgPattern.countParts() == 0) { 504 pos.setBeginIndex(-1); 505 pos.setEndIndex(-1); 506 return; 507 } 508 int partIndex = 0; 509 int currMatchIndex; 510 int count=msgPattern.countParts(); 511 int startingAt = pos.getBeginIndex(); 512 if (startingAt < 0) { 513 startingAt = 0; 514 } 515 516 // The keyword is null until we need to match against a non-explicit, not-"other" value. 517 // Then we get the keyword from the selector. 518 // (In other words, we never call the selector if we match against an explicit value, 519 // or if the only non-explicit keyword is "other".) 520 UnicodeString keyword; 521 UnicodeString matchedWord; 522 const UnicodeString& pattern = msgPattern.getPatternString(); 523 int matchedIndex = -1; 524 // Iterate over (ARG_SELECTOR ARG_START message ARG_LIMIT) tuples 525 // until the end of the plural-only pattern. 526 while (partIndex < count) { 527 const MessagePattern::Part* partSelector = &msgPattern.getPart(partIndex++); 528 if (partSelector->getType() != UMSGPAT_PART_TYPE_ARG_SELECTOR) { 529 // Bad format 530 continue; 531 } 532 533 const MessagePattern::Part* partStart = &msgPattern.getPart(partIndex++); 534 if (partStart->getType() != UMSGPAT_PART_TYPE_MSG_START) { 535 // Bad format 536 continue; 537 } 538 539 const MessagePattern::Part* partLimit = &msgPattern.getPart(partIndex++); 540 if (partLimit->getType() != UMSGPAT_PART_TYPE_MSG_LIMIT) { 541 // Bad format 542 continue; 543 } 544 545 UnicodeString currArg = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit()); 546 if (rbnfLenientScanner != nullptr) { 547 // Check if non-lenient rule finds the text before call lenient parsing 548 int32_t tempIndex = source.indexOf(currArg, startingAt); 549 if (tempIndex >= 0) { 550 currMatchIndex = tempIndex; 551 } else { 552 // If lenient parsing is turned ON, we've got some time consuming parsing ahead of us. 553 int32_t length = -1; 554 currMatchIndex = rbnfLenientScanner->findTextLenient(source, currArg, startingAt, &length); 555 } 556 } 557 else { 558 currMatchIndex = source.indexOf(currArg, startingAt); 559 } 560 if (currMatchIndex >= 0 && currMatchIndex >= matchedIndex && currArg.length() > matchedWord.length()) { 561 matchedIndex = currMatchIndex; 562 matchedWord = currArg; 563 keyword = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit()); 564 } 565 } 566 if (matchedIndex >= 0) { 567 pos.setBeginIndex(matchedIndex); 568 pos.setEndIndex(matchedIndex + matchedWord.length()); 569 result.setString(keyword); 570 return; 571 } 572 573 // Not found! 574 pos.setBeginIndex(-1); 575 pos.setEndIndex(-1); 576 } 577 578 PluralFormat::PluralSelector::~PluralSelector() {} 579 580 PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() { 581 delete pluralRules; 582 } 583 584 UnicodeString PluralFormat::PluralSelectorAdapter::select(void *context, double number, 585 UErrorCode& /*ec*/) const { 586 (void)number; // unused except in the assertion 587 IFixedDecimal *dec=static_cast<IFixedDecimal *>(context); 588 return pluralRules->select(*dec); 589 } 590 591 void PluralFormat::PluralSelectorAdapter::reset() { 592 delete pluralRules; 593 pluralRules = nullptr; 594 } 595 596 597 U_NAMESPACE_END 598 599 600 #endif /* #if !UCONFIG_NO_FORMATTING */ 601 602 //eof