plurfmt.h (26073B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2007-2014, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 * 9 10 * File PLURFMT.H 11 ******************************************************************************** 12 */ 13 14 #ifndef PLURFMT 15 #define PLURFMT 16 17 #include "unicode/utypes.h" 18 19 #if U_SHOW_CPLUSPLUS_API 20 21 /** 22 * \file 23 * \brief C++ API: PluralFormat object 24 */ 25 26 #if !UCONFIG_NO_FORMATTING 27 28 #include "unicode/messagepattern.h" 29 #include "unicode/numfmt.h" 30 #include "unicode/plurrule.h" 31 32 U_NAMESPACE_BEGIN 33 34 class Hashtable; 35 class NFRule; 36 37 /** 38 * <p> 39 * <code>PluralFormat</code> supports the creation of internationalized 40 * messages with plural inflection. It is based on <i>plural 41 * selection</i>, i.e. the caller specifies messages for each 42 * plural case that can appear in the user's language and the 43 * <code>PluralFormat</code> selects the appropriate message based on 44 * the number. 45 * </p> 46 * <h4>The Problem of Plural Forms in Internationalized Messages</h4> 47 * <p> 48 * Different languages have different ways to inflect 49 * plurals. Creating internationalized messages that include plural 50 * forms is only feasible when the framework is able to handle plural 51 * forms of <i>all</i> languages correctly. <code>ChoiceFormat</code> 52 * doesn't handle this well, because it attaches a number interval to 53 * each message and selects the message whose interval contains a 54 * given number. This can only handle a finite number of 55 * intervals. But in some languages, like Polish, one plural case 56 * applies to infinitely many intervals (e.g., the plural case applies to 57 * numbers ending with 2, 3, or 4 except those ending with 12, 13, or 58 * 14). Thus <code>ChoiceFormat</code> is not adequate. 59 * </p><p> 60 * <code>PluralFormat</code> deals with this by breaking the problem 61 * into two parts: 62 * <ul> 63 * <li>It uses <code>PluralRules</code> that can define more complex 64 * conditions for a plural case than just a single interval. These plural 65 * rules define both what plural cases exist in a language, and to 66 * which numbers these cases apply. 67 * <li>It provides predefined plural rules for many languages. Thus, the programmer 68 * need not worry about the plural cases of a language and 69 * does not have to define the plural cases; they can simply 70 * use the predefined keywords. The whole plural formatting of messages can 71 * be done using localized patterns from resource bundles. For predefined plural 72 * rules, see the CLDR <i>Language Plural Rules</i> page at 73 * https://unicode-org.github.io/cldr-staging/charts/latest/supplemental/language_plural_rules.html 74 * </ul> 75 * </p> 76 * <h4>Usage of <code>PluralFormat</code></h4> 77 * <p>Note: Typically, plural formatting is done via <code>MessageFormat</code> 78 * with a <code>plural</code> argument type, 79 * rather than using a stand-alone <code>PluralFormat</code>. 80 * </p><p> 81 * This discussion assumes that you use <code>PluralFormat</code> with 82 * a predefined set of plural rules. You can create one using one of 83 * the constructors that takes a <code>locale</code> object. To 84 * specify the message pattern, you can either pass it to the 85 * constructor or set it explicitly using the 86 * <code>applyPattern()</code> method. The <code>format()</code> 87 * method takes a number object and selects the message of the 88 * matching plural case. This message will be returned. 89 * </p> 90 * <h5>Patterns and Their Interpretation</h5> 91 * <p> 92 * The pattern text defines the message output for each plural case of the 93 * specified locale. Syntax: 94 * <pre> 95 * pluralStyle = [offsetValue] (selector '{' message '}')+ 96 * offsetValue = "offset:" number 97 * selector = explicitValue | keyword 98 * explicitValue = '=' number // adjacent, no white space in between 99 * keyword = [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+ 100 * message: see {@link MessageFormat} 101 * </pre> 102 * Pattern_White_Space between syntax elements is ignored, except 103 * between the {curly braces} and their sub-message, 104 * and between the '=' and the number of an explicitValue. 105 * 106 * </p><p> 107 * There are 6 predefined casekeyword in CLDR/ICU - 'zero', 'one', 'two', 'few', 'many' and 108 * 'other'. You always have to define a message text for the default plural case 109 * <code>other</code> which is contained in every rule set. 110 * If you do not specify a message text for a particular plural case, the 111 * message text of the plural case <code>other</code> gets assigned to this 112 * plural case. 113 * </p><p> 114 * When formatting, the input number is first matched against the explicitValue clauses. 115 * If there is no exact-number match, then a keyword is selected by calling 116 * the <code>PluralRules</code> with the input number <em>minus the offset</em>. 117 * (The offset defaults to 0 if it is omitted from the pattern string.) 118 * If there is no clause with that keyword, then the "other" clauses is returned. 119 * </p><p> 120 * An unquoted pound sign (<code>#</code>) in the selected sub-message 121 * itself (i.e., outside of arguments nested in the sub-message) 122 * is replaced by the input number minus the offset. 123 * The number-minus-offset value is formatted using a 124 * <code>NumberFormat</code> for the <code>PluralFormat</code>'s locale. If you 125 * need special number formatting, you have to use a <code>MessageFormat</code> 126 * and explicitly specify a <code>NumberFormat</code> argument. 127 * <strong>Note:</strong> That argument is formatting without subtracting the offset! 128 * If you need a custom format and have a non-zero offset, then you need to pass the 129 * number-minus-offset value as a separate parameter. 130 * </p> 131 * For a usage example, see the {@link MessageFormat} class documentation. 132 * 133 * <h4>Defining Custom Plural Rules</h4> 134 * <p>If you need to use <code>PluralFormat</code> with custom rules, you can 135 * create a <code>PluralRules</code> object and pass it to 136 * <code>PluralFormat</code>'s constructor. If you also specify a locale in this 137 * constructor, this locale will be used to format the number in the message 138 * texts. 139 * </p><p> 140 * For more information about <code>PluralRules</code>, see 141 * {@link PluralRules}. 142 * </p> 143 * 144 * ported from Java 145 * @stable ICU 4.0 146 */ 147 148 class U_I18N_API_CLASS PluralFormat : public Format { 149 public: 150 151 /** 152 * Creates a new cardinal-number <code>PluralFormat</code> for the default locale. 153 * This locale will be used to get the set of plural rules and for standard 154 * number formatting. 155 * @param status output param set to success/failure code on exit, which 156 * must not indicate a failure before the function call. 157 * @stable ICU 4.0 158 */ 159 U_I18N_API PluralFormat(UErrorCode& status); 160 161 /** 162 * Creates a new cardinal-number <code>PluralFormat</code> for a given locale. 163 * @param locale the <code>PluralFormat</code> will be configured with 164 * rules for this locale. This locale will also be used for 165 * standard number formatting. 166 * @param status output param set to success/failure code on exit, which 167 * must not indicate a failure before the function call. 168 * @stable ICU 4.0 169 */ 170 U_I18N_API PluralFormat(const Locale& locale, UErrorCode& status); 171 172 /** 173 * Creates a new <code>PluralFormat</code> for a given set of rules. 174 * The standard number formatting will be done using the default locale. 175 * @param rules defines the behavior of the <code>PluralFormat</code> 176 * object. 177 * @param status output param set to success/failure code on exit, which 178 * must not indicate a failure before the function call. 179 * @stable ICU 4.0 180 */ 181 U_I18N_API PluralFormat(const PluralRules& rules, UErrorCode& status); 182 183 /** 184 * Creates a new <code>PluralFormat</code> for a given set of rules. 185 * The standard number formatting will be done using the given locale. 186 * @param locale the default number formatting will be done using this 187 * locale. 188 * @param rules defines the behavior of the <code>PluralFormat</code> 189 * object. 190 * @param status output param set to success/failure code on exit, which 191 * must not indicate a failure before the function call. 192 * @stable ICU 4.0 193 */ 194 U_I18N_API PluralFormat(const Locale& locale, const PluralRules& rules, UErrorCode& status); 195 196 /** 197 * Creates a new <code>PluralFormat</code> for the plural type. 198 * The standard number formatting will be done using the given locale. 199 * @param locale the default number formatting will be done using this 200 * locale. 201 * @param type The plural type (e.g., cardinal or ordinal). 202 * @param status output param set to success/failure code on exit, which 203 * must not indicate a failure before the function call. 204 * @stable ICU 50 205 */ 206 U_I18N_API PluralFormat(const Locale& locale, UPluralType type, UErrorCode& status); 207 208 /** 209 * Creates a new cardinal-number <code>PluralFormat</code> for a given pattern string. 210 * The default locale will be used to get the set of plural rules and for 211 * standard number formatting. 212 * @param pattern the pattern for this <code>PluralFormat</code>. 213 * errors are returned to status if the pattern is invalid. 214 * @param status output param set to success/failure code on exit, which 215 * must not indicate a failure before the function call. 216 * @stable ICU 4.0 217 */ 218 U_I18N_API PluralFormat(const UnicodeString& pattern, UErrorCode& status); 219 220 /** 221 * Creates a new cardinal-number <code>PluralFormat</code> for a given pattern string and 222 * locale. 223 * The locale will be used to get the set of plural rules and for 224 * standard number formatting. 225 * @param locale the <code>PluralFormat</code> will be configured with 226 * rules for this locale. This locale will also be used for 227 * standard number formatting. 228 * @param pattern the pattern for this <code>PluralFormat</code>. 229 * errors are returned to status if the pattern is invalid. 230 * @param status output param set to success/failure code on exit, which 231 * must not indicate a failure before the function call. 232 * @stable ICU 4.0 233 */ 234 U_I18N_API PluralFormat(const Locale& locale, const UnicodeString& pattern, UErrorCode& status); 235 236 /** 237 * Creates a new <code>PluralFormat</code> for a given set of rules, a 238 * pattern and a locale. 239 * @param rules defines the behavior of the <code>PluralFormat</code> 240 * object. 241 * @param pattern the pattern for this <code>PluralFormat</code>. 242 * errors are returned to status if the pattern is invalid. 243 * @param status output param set to success/failure code on exit, which 244 * must not indicate a failure before the function call. 245 * @stable ICU 4.0 246 */ 247 U_I18N_API PluralFormat(const PluralRules& rules, const UnicodeString& pattern, UErrorCode& status); 248 249 /** 250 * Creates a new <code>PluralFormat</code> for a given set of rules, a 251 * pattern and a locale. 252 * @param locale the <code>PluralFormat</code> will be configured with 253 * rules for this locale. This locale will also be used for 254 * standard number formatting. 255 * @param rules defines the behavior of the <code>PluralFormat</code> 256 * object. 257 * @param pattern the pattern for this <code>PluralFormat</code>. 258 * errors are returned to status if the pattern is invalid. 259 * @param status output param set to success/failure code on exit, which 260 * must not indicate a failure before the function call. 261 * @stable ICU 4.0 262 */ 263 U_I18N_API PluralFormat(const Locale& locale, 264 const PluralRules& rules, 265 const UnicodeString& pattern, 266 UErrorCode& status); 267 268 /** 269 * Creates a new <code>PluralFormat</code> for a plural type, a 270 * pattern and a locale. 271 * @param locale the <code>PluralFormat</code> will be configured with 272 * rules for this locale. This locale will also be used for 273 * standard number formatting. 274 * @param type The plural type (e.g., cardinal or ordinal). 275 * @param pattern the pattern for this <code>PluralFormat</code>. 276 * errors are returned to status if the pattern is invalid. 277 * @param status output param set to success/failure code on exit, which 278 * must not indicate a failure before the function call. 279 * @stable ICU 50 280 */ 281 U_I18N_API PluralFormat(const Locale& locale, 282 UPluralType type, 283 const UnicodeString& pattern, 284 UErrorCode& status); 285 286 /** 287 * copy constructor. 288 * @stable ICU 4.0 289 */ 290 U_I18N_API PluralFormat(const PluralFormat& other); 291 292 /** 293 * Destructor. 294 * @stable ICU 4.0 295 */ 296 U_I18N_API virtual ~PluralFormat(); 297 298 /** 299 * Sets the pattern used by this plural format. 300 * The method parses the pattern and creates a map of format strings 301 * for the plural rules. 302 * Patterns and their interpretation are specified in the class description. 303 * 304 * @param pattern the pattern for this plural format 305 * errors are returned to status if the pattern is invalid. 306 * @param status output param set to success/failure code on exit, which 307 * must not indicate a failure before the function call. 308 * @stable ICU 4.0 309 */ 310 U_I18N_API void applyPattern(const UnicodeString& pattern, UErrorCode& status); 311 312 using Format::format; 313 314 /** 315 * Formats a plural message for a given number. 316 * 317 * @param number a number for which the plural message should be formatted 318 * for. If no pattern has been applied to this 319 * <code>PluralFormat</code> object yet, the formatted number 320 * will be returned. 321 * @param status output param set to success/failure code on exit, which 322 * must not indicate a failure before the function call. 323 * @return the string containing the formatted plural message. 324 * @stable ICU 4.0 325 */ 326 U_I18N_API UnicodeString format(int32_t number, UErrorCode& status) const; 327 328 /** 329 * Formats a plural message for a given number. 330 * 331 * @param number a number for which the plural message should be formatted 332 * for. If no pattern has been applied to this 333 * PluralFormat object yet, the formatted number 334 * will be returned. 335 * @param status output param set to success or failure code on exit, which 336 * must not indicate a failure before the function call. 337 * @return the string containing the formatted plural message. 338 * @stable ICU 4.0 339 */ 340 U_I18N_API UnicodeString format(double number, UErrorCode& status) const; 341 342 /** 343 * Formats a plural message for a given number. 344 * 345 * @param number a number for which the plural message should be formatted 346 * for. If no pattern has been applied to this 347 * <code>PluralFormat</code> object yet, the formatted number 348 * will be returned. 349 * @param appendTo output parameter to receive result. 350 * result is appended to existing contents. 351 * @param pos On input: an alignment field, if desired. 352 * On output: the offsets of the alignment field. 353 * @param status output param set to success/failure code on exit, which 354 * must not indicate a failure before the function call. 355 * @return the string containing the formatted plural message. 356 * @stable ICU 4.0 357 */ 358 U_I18N_API UnicodeString& format(int32_t number, 359 UnicodeString& appendTo, 360 FieldPosition& pos, 361 UErrorCode& status) const; 362 363 /** 364 * Formats a plural message for a given number. 365 * 366 * @param number a number for which the plural message should be formatted 367 * for. If no pattern has been applied to this 368 * PluralFormat object yet, the formatted number 369 * will be returned. 370 * @param appendTo output parameter to receive result. 371 * result is appended to existing contents. 372 * @param pos On input: an alignment field, if desired. 373 * On output: the offsets of the alignment field. 374 * @param status output param set to success/failure code on exit, which 375 * must not indicate a failure before the function call. 376 * @return the string containing the formatted plural message. 377 * @stable ICU 4.0 378 */ 379 U_I18N_API UnicodeString& format(double number, 380 UnicodeString& appendTo, 381 FieldPosition& pos, 382 UErrorCode& status) const; 383 384 #ifndef U_HIDE_DEPRECATED_API 385 /** 386 * Sets the locale used by this <code>PluraFormat</code> object. 387 * Note: Calling this method resets this <code>PluraFormat</code> object, 388 * i.e., a pattern that was applied previously will be removed, 389 * and the NumberFormat is set to the default number format for 390 * the locale. The resulting format behaves the same as one 391 * constructed from {@link #PluralFormat(const Locale& locale, UPluralType type, UErrorCode& status)} 392 * with UPLURAL_TYPE_CARDINAL. 393 * @param locale the <code>locale</code> to use to configure the formatter. 394 * @param status output param set to success/failure code on exit, which 395 * must not indicate a failure before the function call. 396 * @deprecated ICU 50 This method clears the pattern and might create 397 * a different kind of PluralRules instance; 398 * use one of the constructors to create a new instance instead. 399 */ 400 U_I18N_API void setLocale(const Locale& locale, UErrorCode& status); 401 #endif /* U_HIDE_DEPRECATED_API */ 402 403 /** 404 * Sets the number format used by this formatter. You only need to 405 * call this if you want a different number format than the default 406 * formatter for the locale. 407 * @param format the number format to use. 408 * @param status output param set to success/failure code on exit, which 409 * must not indicate a failure before the function call. 410 * @stable ICU 4.0 411 */ 412 U_I18N_API void setNumberFormat(const NumberFormat* format, UErrorCode& status); 413 414 /** 415 * Assignment operator 416 * 417 * @param other the PluralFormat object to copy from. 418 * @stable ICU 4.0 419 */ 420 U_I18N_API PluralFormat& operator=(const PluralFormat& other); 421 422 /** 423 * Return true if another object is semantically equal to this one. 424 * 425 * @param other the PluralFormat object to be compared with. 426 * @return true if other is semantically equal to this. 427 * @stable ICU 4.0 428 */ 429 U_I18N_API virtual bool operator==(const Format& other) const override; 430 431 /** 432 * Return true if another object is semantically unequal to this one. 433 * 434 * @param other the PluralFormat object to be compared with. 435 * @return true if other is semantically unequal to this. 436 * @stable ICU 4.0 437 */ 438 U_I18N_API virtual bool operator!=(const Format& other) const; 439 440 /** 441 * Clones this Format object polymorphically. The caller owns the 442 * result and should delete it when done. 443 * @stable ICU 4.0 444 */ 445 U_I18N_API virtual PluralFormat* clone() const override; 446 447 /** 448 * Formats a plural message for a number taken from a Formattable object. 449 * 450 * @param obj The object containing a number for which the 451 * plural message should be formatted. 452 * The object must be of a numeric type. 453 * @param appendTo output parameter to receive result. 454 * Result is appended to existing contents. 455 * @param pos On input: an alignment field, if desired. 456 * On output: the offsets of the alignment field. 457 * @param status output param filled with success/failure status. 458 * @return Reference to 'appendTo' parameter. 459 * @stable ICU 4.0 460 */ 461 U_I18N_API UnicodeString& format(const Formattable& obj, 462 UnicodeString& appendTo, 463 FieldPosition& pos, 464 UErrorCode& status) const override; 465 466 /** 467 * Returns the pattern from applyPattern() or constructor(). 468 * 469 * @param appendTo output parameter to receive result. 470 * Result is appended to existing contents. 471 * @return the UnicodeString with inserted pattern. 472 * @stable ICU 4.0 473 */ 474 U_I18N_API UnicodeString& toPattern(UnicodeString& appendTo); 475 476 /** 477 * This method is not yet supported by <code>PluralFormat</code>. 478 * <P> 479 * Before calling, set parse_pos.index to the offset you want to start 480 * parsing at in the source. After calling, parse_pos.index is the end of 481 * the text you parsed. If error occurs, index is unchanged. 482 * <P> 483 * When parsing, leading whitespace is discarded (with a successful parse), 484 * while trailing whitespace is left as is. 485 * <P> 486 * See Format::parseObject() for more. 487 * 488 * @param source The string to be parsed into an object. 489 * @param result Formattable to be set to the parse result. 490 * If parse fails, return contents are undefined. 491 * @param parse_pos The position to start parsing at. Upon return 492 * this param is set to the position after the 493 * last character successfully parsed. If the 494 * source is not parsed successfully, this param 495 * will remain unchanged. 496 * @stable ICU 4.0 497 */ 498 U_I18N_API virtual void parseObject(const UnicodeString& source, 499 Formattable& result, 500 ParsePosition& parse_pos) const override; 501 502 /** 503 * ICU "poor man's RTTI", returns a UClassID for this class. 504 * 505 * @stable ICU 4.0 506 * 507 */ 508 U_I18N_API static UClassID getStaticClassID(); 509 510 /** 511 * ICU "poor man's RTTI", returns a UClassID for the actual class. 512 * 513 * @stable ICU 4.0 514 */ 515 U_I18N_API virtual UClassID getDynamicClassID() const override; 516 517 private: 518 class PluralSelector : public UMemory { 519 public: 520 virtual ~PluralSelector(); 521 /** 522 * Given a number, returns the appropriate PluralFormat keyword. 523 * 524 * @param context worker object for the selector. 525 * @param number The number to be plural-formatted. 526 * @param ec Error code. 527 * @return The selected PluralFormat keyword. 528 */ 529 virtual UnicodeString select(void *context, double number, UErrorCode& ec) const = 0; 530 }; 531 532 class PluralSelectorAdapter : public PluralSelector { 533 public: 534 PluralSelectorAdapter() : pluralRules(nullptr) { 535 } 536 537 virtual ~PluralSelectorAdapter(); 538 539 virtual UnicodeString select(void *context, double number, UErrorCode& /*ec*/) const override; 540 541 void reset(); 542 543 PluralRules* pluralRules; 544 }; 545 546 Locale locale; 547 MessagePattern msgPattern; 548 NumberFormat* numberFormat; 549 double offset; 550 PluralSelectorAdapter pluralRulesWrapper; 551 552 PluralFormat() = delete; // default constructor not implemented 553 void init(const PluralRules* rules, UPluralType type, UErrorCode& status); 554 /** 555 * Copies dynamically allocated values (pointer fields). 556 * Others are copied using their copy constructors and assignment operators. 557 */ 558 void copyObjects(const PluralFormat& other); 559 560 UnicodeString& format(const Formattable& numberObject, double number, 561 UnicodeString& appendTo, 562 FieldPosition& pos, 563 UErrorCode& status) const; 564 565 /** 566 * Finds the PluralFormat sub-message for the given number, or the "other" sub-message. 567 * @param pattern A MessagePattern. 568 * @param partIndex the index of the first PluralFormat argument style part. 569 * @param selector the PluralSelector for mapping the number (minus offset) to a keyword. 570 * @param context worker object for the selector. 571 * @param number a number to be matched to one of the PluralFormat argument's explicit values, 572 * or mapped via the PluralSelector. 573 * @param ec ICU error code. 574 * @return the sub-message start part index. 575 */ 576 static int32_t findSubMessage( 577 const MessagePattern& pattern, int32_t partIndex, 578 const PluralSelector& selector, void *context, double number, UErrorCode& ec); 579 580 void parseType(const UnicodeString& source, const NFRule *rbnfLenientScanner, 581 Formattable& result, FieldPosition& pos) const; 582 583 friend class MessageFormat; 584 friend class NFRule; 585 }; 586 587 U_NAMESPACE_END 588 589 #endif /* #if !UCONFIG_NO_FORMATTING */ 590 591 #endif /* U_SHOW_CPLUSPLUS_API */ 592 593 #endif // _PLURFMT 594 //eof