[ tor-browser ].git.dasho

rbnf.h (58565B)
      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 * Copyright (C) 1997-2015, International Business Machines Corporation and others.
      6 * All Rights Reserved.
      7 *******************************************************************************
      8 */
      9 
     10 #ifndef RBNF_H
     11 #define RBNF_H
     12 
     13 #include "unicode/utypes.h"
     14 
     15 #if U_SHOW_CPLUSPLUS_API
     16 
     17 /**
     18 * \file
     19 * \brief C++ API: Rule Based Number Format
     20 */
     21 
     22 /**
     23 * \def U_HAVE_RBNF
     24 * This will be 0 if RBNF support is not included in ICU
     25 * and 1 if it is.
     26 *
     27 * @stable ICU 2.4
     28 */
     29 #if UCONFIG_NO_FORMATTING
     30 #define U_HAVE_RBNF 0
     31 #else
     32 #define U_HAVE_RBNF 1
     33 
     34 #include "unicode/dcfmtsym.h"
     35 #include "unicode/fmtable.h"
     36 #include "unicode/locid.h"
     37 #include "unicode/numfmt.h"
     38 #include "unicode/unistr.h"
     39 #include "unicode/strenum.h"
     40 #include "unicode/brkiter.h"
     41 #include "unicode/upluralrules.h"
     42 
     43 U_NAMESPACE_BEGIN
     44 
     45 class NFRule;
     46 class NFRuleSet;
     47 class LocalizationInfo;
     48 class PluralFormat;
     49 class RuleBasedCollator;
     50 
     51 /**
     52 * Tags for the predefined rulesets.
     53 *
     54 * @stable ICU 2.2
     55 */
     56 enum URBNFRuleSetTag {
     57    /**
     58     * Requests predefined ruleset for spelling out numeric values in words.
     59     * @stable ICU 2.2
     60     */
     61    URBNF_SPELLOUT,
     62    /**
     63     * Requests predefined ruleset for the ordinal form of a number.
     64     * @stable ICU 2.2
     65     */
     66    URBNF_ORDINAL,
     67 #ifndef U_HIDE_DEPRECATED_API
     68    /**
     69     * Requests predefined ruleset for formatting a value as a duration in hours, minutes, and seconds.
     70     * @deprecated ICU 74 Use MeasureFormat instead.
     71     */
     72    URBNF_DURATION,
     73 #endif // U_HIDE_DERECATED_API
     74    /**
     75     * Requests predefined ruleset for various non-place-value numbering systems.
     76     * WARNING: The same resource contains rule sets for a variety of different numbering systems.
     77     * You need to call setDefaultRuleSet() on the formatter to choose the actual numbering system.
     78     * @stable ICU 2.2
     79     */
     80    URBNF_NUMBERING_SYSTEM = 3,
     81 #ifndef U_HIDE_DEPRECATED_API
     82    /**
     83     * One more than the highest normal URBNFRuleSetTag value.
     84     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
     85     */
     86    URBNF_COUNT
     87 #endif  // U_HIDE_DEPRECATED_API
     88 };
     89 
     90 /**
     91 * The RuleBasedNumberFormat class formats numbers according to a set of rules.
     92 *
     93 * <p>This number formatter is typically used for spelling out numeric values in words (e.g., 25,376
     94 * as &quot;twenty-five thousand three hundred seventy-six&quot; or &quot;vingt-cinq mille trois
     95 * cent soixante-seize&quot; or
     96 * &quot;f&uuml;nfundzwanzigtausenddreihundertsechsundsiebzig&quot;), but can also be used for
     97 * other complicated formatting tasks. For example, formatting a number as Roman numerals (e.g. 8 as VIII)
     98 * or as ordinal digits (e.g. 1st, 2nd, 3rd, 4th).</p>
     99 *
    100 * <p>The resources contain three predefined formatters for each locale: spellout, which
    101 * spells out a value in words (123 is &quot;one hundred twenty-three&quot;); ordinal, which
    102 * appends an ordinal suffix to the end of a numeral (123 is &quot;123rd&quot;); and
    103 * numbering system, which shows a number in other non-decimal based systems (e.g. Roman numerals).
    104 * The client can also define more specialized <code>RuleBasedNumberFormat</code>s
    105 * by supplying programmer-defined rule sets.</p>
    106 *
    107 * <p>The behavior of a <code>RuleBasedNumberFormat</code> is specified by a textual description
    108 * that is either passed to the constructor as a <code>String</code> or loaded from a resource
    109 * bundle. In its simplest form, the description consists of a semicolon-delimited list of <em>rules.</em>
    110 * Each rule has a string of output text and a value or range of values it is applicable to.
    111 * In a typical spellout rule set, the first twenty rules are the words for the numbers from
    112 * 0 to 19:</p>
    113 *
    114 * <pre>zero; one; two; three; four; five; six; seven; eight; nine;
    115 * ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen; seventeen; eighteen; nineteen;</pre>
    116 *
    117 * <p>For larger numbers, we can use the preceding set of rules to format the ones place, and
    118 * we only have to supply the words for the multiples of 10:</p>
    119 *
    120 * <pre>
    121 * 20: twenty[-&gt;&gt;];
    122 * 30: thirty[-&gt;&gt;];
    123 * 40: forty[-&gt;&gt;];
    124 * 50: fifty[-&gt;&gt;];
    125 * 60: sixty[-&gt;&gt;];
    126 * 70: seventy[-&gt;&gt;];
    127 * 80: eighty[-&gt;&gt;];
    128 * 90: ninety[-&gt;&gt;];</pre>
    129 *
    130 * <p>In these rules, the <em>base value</em> is spelled out explicitly and set off from the
    131 * rule's output text with a colon. The rules are in a sorted list, and a rule is applicable
    132 * to all numbers from its own base value to one less than the next rule's base value. The
    133 * &quot;&gt;&gt;&quot; token is called a <em>substitution</em> and tells the formatter to
    134 * isolate the number's ones digit, format it using this same set of rules, and place the
    135 * result at the position of the &quot;&gt;&gt;&quot; token. Text in brackets is omitted if
    136 * the number being formatted is an even multiple of 10 (the hyphen is a literal hyphen; 24
    137 * is &quot;twenty-four,&quot; not &quot;twenty four&quot;).</p>
    138 *
    139 * <p>For even larger numbers, we can actually look up several parts of the number in the
    140 * list:</p>
    141 *
    142 * <pre>
    143 * 100: &lt;&lt; hundred[ &gt;&gt;];</pre>
    144 *
    145 * <p>The &quot;&lt;&lt;&quot; represents a new kind of substitution. The &lt;&lt; isolates
    146 * the hundreds digit (and any digits to its left), formats it using this same rule set, and
    147 * places the result where the &quot;&lt;&lt;&quot; was. Notice also that the meaning of
    148 * &gt;&gt; has changed: it now refers to both the tens and the ones digits. The meaning of
    149 * both substitutions depends on the rule's base value. The base value determines the rule's <em>divisor,</em>
    150 * which is the highest power of 10 that is less than or equal to the base value (the user
    151 * can change this). To fill in the substitutions, the formatter divides the number being
    152 * formatted by the divisor. The integral quotient is used to fill in the &lt;&lt;
    153 * substitution, and the remainder is used to fill in the &gt;&gt; substitution. The meaning
    154 * of the brackets changes similarly: text in brackets is omitted if the value being
    155 * formatted is an even multiple of the rule's divisor. The rules are applied recursively, so
    156 * if a substitution is filled in with text that includes another substitution, that
    157 * substitution is also filled in.</p>
    158 *
    159 * <p>This rule covers values up to 999, at which point we add another rule:</p>
    160 *
    161 * <pre>
    162 * 1000: &lt;&lt; thousand[ &gt;&gt;];</pre>
    163 *
    164 * <p>Just like the 100 rule, the meanings of the brackets and substitution tokens shift because the rule's
    165 * base value is a higher power of 10, changing the rule's divisor. This rule can actually be
    166 * used all the way up to 999,999. This allows us to finish out the rules as follows:</p>
    167 *
    168 * <pre>
    169 * 1,000,000: &lt;&lt; million[ &gt;&gt;];
    170 * 1,000,000,000: &lt;&lt; billion[ &gt;&gt;];
    171 * 1,000,000,000,000: &lt;&lt; trillion[ &gt;&gt;];
    172 * 1,000,000,000,000,000: OUT OF RANGE!;</pre>
    173 *
    174 * <p>Commas, periods, and spaces can be used in the base values to improve legibility and
    175 * are ignored by the rule parser. The last rule in the list is customarily treated as an
    176 * &quot;overflow rule&quot;, applying to everything from its base value on up, and often (as
    177 * in this example) being used to print out an error message or default representation.
    178 * Notice also that the size of the major groupings in large numbers is controlled by the
    179 * spacing of the rules: because in English we group numbers by thousand, the higher rules
    180 * are separated from each other by a factor of 1,000.</p>
    181 *
    182 * <p>To see how these rules actually work in practice, consider the following example.
    183 * Formatting 25,340 with this rule set would work like this:</p>
    184 *
    185 * <table style="border-collapse: collapse;">
    186 *   <tr>
    187 *     <td style="width: 257; vertical-align: top;"><strong>&lt;&lt; thousand &gt;&gt;</strong></td>
    188 *     <td style="width: 340; vertical-align: top;">[the rule whose base value is 1,000 is applicable to 25,340]</td>
    189 *   </tr>
    190 *   <tr>
    191 *     <td style="width: 257; vertical-align: top;"><strong>twenty-&gt;&gt;</strong> thousand &gt;&gt;</td>
    192 *     <td style="width: 340; vertical-align: top;">[25,340 over 1,000 is 25. The rule for 20 applies.]</td>
    193 *   </tr>
    194 *   <tr>
    195 *     <td style="width: 257; vertical-align: top;">twenty-<strong>five</strong> thousand &gt;&gt;</td>
    196 *     <td style="width: 340; vertical-align: top;">[25 mod 10 is 5. The rule for 5 is &quot;five.&quot;</td>
    197 *   </tr>
    198 *   <tr>
    199 *     <td style="width: 257; vertical-align: top;">twenty-five thousand <strong>&lt;&lt; hundred &gt;&gt;</strong></td>
    200 *     <td style="width: 340; vertical-align: top;">[25,340 mod 1,000 is 340. The rule for 100 applies.]</td>
    201 *   </tr>
    202 *   <tr>
    203 *     <td style="width: 257; vertical-align: top;">twenty-five thousand <strong>three</strong> hundred &gt;&gt;</td>
    204 *     <td style="width: 340; vertical-align: top;">[340 over 100 is 3. The rule for 3 is &quot;three.&quot;]</td>
    205 *   </tr>
    206 *   <tr>
    207 *     <td style="width: 257; vertical-align: top;">twenty-five thousand three hundred <strong>forty</strong></td>
    208 *     <td style="width: 340; vertical-align: top;">[340 mod 100 is 40. The rule for 40 applies. Since 40 divides
    209 *     evenly by 10, the hyphen and substitution in the brackets are omitted.]</td>
    210 *   </tr>
    211 * </table>
    212 *
    213 * <p>The above syntax suffices only to format positive integers. To format negative numbers,
    214 * we add a special rule:</p>
    215 *
    216 * <pre>-x: minus &gt;&gt;;</pre>
    217 *
    218 * <p>This is called a <em>negative-number rule,</em> and is identified by &quot;-x&quot;
    219 * where the base value would be. This rule is used to format all negative numbers. the
    220 * &gt;&gt; token here means &quot;find the number's absolute value, format it with these
    221 * rules, and put the result here.&quot;</p>
    222 *
    223 * <p>We also add a special rule called a <em>fraction rule</em> for numbers with fractional
    224 * parts:</p>
    225 *
    226 * <pre>x.x: &lt;&lt; point &gt;&gt;;</pre>
    227 *
    228 * <p>This rule is used for all positive non-integers (negative non-integers pass through the
    229 * negative-number rule first and then through this rule). Here, the &lt;&lt; token refers to
    230 * the number's integral part, and the &gt;&gt; to the number's fractional part. The
    231 * fractional part is formatted as a series of single-digit numbers (e.g., 123.456 would be
    232 * formatted as &quot;one hundred twenty-three point four five six&quot;).</p>
    233 *
    234 * <p>To see how this rule syntax is applied to various languages, examine the resource data.</p>
    235 *
    236 * <p>There is actually much more flexibility built into the rule language than the
    237 * description above shows. A formatter may own multiple rule sets, which can be selected by
    238 * the caller, and which can use each other to fill in their substitutions. Substitutions can
    239 * also be filled in with digits, using a DecimalFormat object. There is syntax that can be
    240 * used to alter a rule's divisor in various ways. And there is provision for much more
    241 * flexible fraction handling. A complete description of the rule syntax follows:</p>
    242 *
    243 * <hr>
    244 *
    245 * <p>The description of a <code>RuleBasedNumberFormat</code>'s behavior consists of one or more <em>rule
    246 * sets.</em> Each rule set consists of a name, a colon, and a list of <em>rules</em>. A rule
    247 * set name must begin with a % sign. Rule sets with a name that begins with a single % sign
    248 * are <em>public</em>, and that name can be referenced to format and parse numbers.
    249 * Rule sets with names that begin with %% are <em>private.</em>. They exist only for the use
    250 * of other rule sets. If a formatter only has one rule set, the name may be omitted.</p>
    251 *
    252 * <p>The user can also specify a special &quot;rule set&quot; named <code>%%lenient-parse</code>.
    253 * The body of <code>%%lenient-parse</code> isn't a set of number-formatting rules, but a <code>RuleBasedCollator</code>
    254 * description which is used to define equivalences for lenient parsing. For more information
    255 * on the syntax, see <code>RuleBasedCollator</code>. For more information on lenient parsing,
    256 * see <code>setLenientParse()</code>. <em>Note:</em> symbols that have syntactic meaning
    257 * in collation rules, such as '&amp;', have no particular meaning when appearing outside
    258 * of the <code>lenient-parse</code> rule set.</p>
    259 *
    260 * <p>The body of a rule set consists of an ordered, semicolon-delimited list of <em>rules.</em>
    261 * Internally, every rule has a base value, a divisor, rule text, and zero, one, or two <em>substitutions.</em>
    262 * These parameters are controlled by the description syntax, which consists of a <em>rule
    263 * descriptor,</em> a colon, and a <em>rule body.</em></p>
    264 *
    265 * <p>A rule descriptor can take one of the following forms (text in <em>italics</em> is the
    266 * name of a token):</p>
    267 *
    268 * <table style="border-collapse: collapse;">
    269 *   <tr>
    270 *     <th style="padding-left: 1em; padding-right: 1em;">Descriptor</th>
    271 *     <th>Description</th>
    272 *   </tr>
    273 *   <tr style="border-top: 1px solid black;">
    274 *     <td style="vertical-align: top;"><em>bv</em>:</td>
    275 *     <td style="vertical-align: top;"><em>bv</em> specifies the rule's base value. <em>bv</em> is a decimal
    276 *     number expressed using ASCII digits. <em>bv</em> may contain spaces, period, and commas,
    277 *     which are ignored. The rule's divisor is the highest power of 10 less than or equal to
    278 *     the base value.</td>
    279 *   </tr>
    280 *   <tr style="border-top: 1px solid black;">
    281 *     <td style="vertical-align: top;"><em>bv</em>/<em>rad</em>:</td>
    282 *     <td style="vertical-align: top;"><em>bv</em> specifies the rule's base value. The rule's divisor is the
    283 *     highest power of <em>rad</em> less than or equal to the base value.</td>
    284 *   </tr>
    285 *   <tr style="border-top: 1px solid black;">
    286 *     <td style="vertical-align: top;"><em>bv</em>&gt;:</td>
    287 *     <td style="vertical-align: top;"><em>bv</em> specifies the rule's base value. To calculate the divisor,
    288 *     let the radix be 10, and the exponent be the highest exponent of the radix that yields a
    289 *     result less than or equal to the base value. Every &gt; character after the base value
    290 *     decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix
    291 *     raised to the power of the exponent; otherwise, the divisor is 1.</td>
    292 *   </tr>
    293 *   <tr style="border-top: 1px solid black;">
    294 *     <td style="vertical-align: top;"><em>bv</em>/<em>rad</em>&gt;:</td>
    295 *     <td style="vertical-align: top;"><em>bv</em> specifies the rule's base value. To calculate the divisor,
    296 *     let the radix be <em>rad</em>, and the exponent be the highest exponent of the radix that
    297 *     yields a result less than or equal to the base value. Every &gt; character after the radix
    298 *     decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix
    299 *     raised to the power of the exponent; otherwise, the divisor is 1.</td>
    300 *   </tr>
    301 *   <tr style="border-top: 1px solid black;">
    302 *     <td style="vertical-align: top;">-x:</td>
    303 *     <td style="vertical-align: top;">The rule is a negative-number rule.</td>
    304 *   </tr>
    305 *   <tr style="border-top: 1px solid black;">
    306 *     <td style="vertical-align: top;">x.x:</td>
    307 *     <td style="vertical-align: top;">The rule is an <em>improper fraction rule</em>. If the full stop in
    308 *     the middle of the rule name is replaced with the decimal point
    309 *     that is used in the language or DecimalFormatSymbols, then that rule will
    310 *     have precedence when formatting and parsing this rule. For example, some
    311 *     languages use the comma, and can thus be written as x,x instead. For example,
    312 *     you can use "x.x: &lt;&lt; point &gt;&gt;;x,x: &lt;&lt; comma &gt;&gt;;" to
    313 *     handle the decimal point that matches the language's natural spelling of
    314 *     the punctuation of either the full stop or comma.</td>
    315 *   </tr>
    316 *   <tr style="border-top: 1px solid black;">
    317 *     <td style="vertical-align: top;">0.x:</td>
    318 *     <td style="vertical-align: top;">The rule is a <em>proper fraction rule</em>. If the full stop in
    319 *     the middle of the rule name is replaced with the decimal point
    320 *     that is used in the language or DecimalFormatSymbols, then that rule will
    321 *     have precedence when formatting and parsing this rule. For example, some
    322 *     languages use the comma, and can thus be written as 0,x instead. For example,
    323 *     you can use "0.x: point &gt;&gt;;0,x: comma &gt;&gt;;" to
    324 *     handle the decimal point that matches the language's natural spelling of
    325 *     the punctuation of either the full stop or comma</td>
    326 *   </tr>
    327 *   <tr style="border-top: 1px solid black;">
    328 *     <td style="vertical-align: top;">x.0:</td>
    329 *     <td style="vertical-align: top;">The rule is a <em>default rule</em>. If the full stop in
    330 *     the middle of the rule name is replaced with the decimal point
    331 *     that is used in the language or DecimalFormatSymbols, then that rule will
    332 *     have precedence when formatting and parsing this rule. For example, some
    333 *     languages use the comma, and can thus be written as x,0 instead. For example,
    334 *     you can use "x.0: &lt;&lt; point;x,0: &lt;&lt; comma;" to
    335 *     handle the decimal point that matches the language's natural spelling of
    336 *     the punctuation of either the full stop or comma</td>
    337 *   </tr>
    338 *   <tr style="border-top: 1px solid black;">
    339 *     <td style="vertical-align: top;">Inf:</td>
    340 *     <td style="vertical-align: top;">The rule for infinity.</td>
    341 *   </tr>
    342 *   <tr style="border-top: 1px solid black;">
    343 *     <td style="vertical-align: top;">NaN:</td>
    344 *     <td style="vertical-align: top;">The rule for an IEEE 754 NaN (not a number).</td>
    345 *   </tr>
    346 *   <tr style="border-top: 1px solid black;">
    347 *     <td style="vertical-align: top;"><em>nothing</em></td>
    348 *     <td style="vertical-align: top;">If the rule's rule descriptor is left out, the base value is one plus the
    349 *     preceding rule's base value (or zero if this is the first rule in the list) in a normal
    350 *     rule set. In a fraction rule set, the base value is the same as the preceding rule's
    351 *     base value.</td>
    352 *   </tr>
    353 * </table>
    354 *
    355 * <p>A rule set may be either a regular rule set or a <em>fraction rule set,</em> depending
    356 * on whether it is used to format a number's integral part (or the whole number) or a
    357 * number's fractional part. Using a rule set to format a rule's fractional part makes it a
    358 * fraction rule set.</p>
    359 *
    360 * <p>Which rule is used to format a number is defined according to one of the following
    361 * algorithms: If the rule set is a regular rule set, do the following:
    362 *
    363 * <ul>
    364 *   <li>If the rule set includes a default rule (and the number was passed in as a <code>double</code>),
    365 *     use the default rule. If the number being formatted was passed in as a <code>long</code>,
    366 *     the default rule is ignored.</li>
    367 *   <li>If the number is negative, use the negative-number rule.</li>
    368 *   <li>If the number has a fractional part and is greater than 1, use the improper fraction
    369 *     rule.</li>
    370 *   <li>If the number has a fractional part and is between 0 and 1, use the proper fraction
    371 *     rule.</li>
    372 *   <li>Binary-search the rule list for the rule with the highest base value less than or equal
    373 *     to the number. If that rule has two substitutions, its base value is not an even multiple
    374 *     of its divisor, and the number <em>is</em> an even multiple of the rule's divisor, use the
    375 *     rule that precedes it in the rule list. Otherwise, use the rule itself.</li>
    376 * </ul>
    377 *
    378 * <p>If the rule set is a fraction rule set, do the following:
    379 *
    380 * <ul>
    381 *   <li>Ignore negative-number and fraction rules.</li>
    382 *   <li>For each rule in the list, multiply the number being formatted (which will always be
    383 *     between 0 and 1) by the rule's base value. Keep track of the distance between the result
    384 *     the nearest integer.</li>
    385 *   <li>Use the rule that produced the result closest to zero in the above calculation. In the
    386 *     event of a tie or a direct hit, use the first matching rule encountered. (The idea here is
    387 *     to try each rule's base value as a possible denominator of a fraction. Whichever
    388 *     denominator produces the fraction closest in value to the number being formatted wins.) If
    389 *     the rule following the matching rule has the same base value, use it if the numerator of
    390 *     the fraction is anything other than 1; if the numerator is 1, use the original matching
    391 *     rule. (This is to allow singular and plural forms of the rule text without a lot of extra
    392 *     hassle.)</li>
    393 * </ul>
    394 *
    395 * <p>A rule's body consists of a string of characters terminated by a semicolon. The rule
    396 * may include zero, one, or two <em>substitution tokens,</em> and a range of text in
    397 * brackets. The brackets denote optional text (and may also include one or both
    398 * substitutions). The exact meanings of the substitution tokens, and under what conditions
    399 * optional text is omitted, depend on the syntax of the substitution token and the context.
    400 * The rest of the text in a rule body is literal text that is output when the rule matches
    401 * the number being formatted.</p>
    402 *
    403 * <p>A substitution token begins and ends with a <em>token character.</em> The token
    404 * character and the context together specify a mathematical operation to be performed on the
    405 * number being formatted. An optional <em>substitution descriptor </em>specifies how the
    406 * value resulting from that operation is used to fill in the substitution. The position of
    407 * the substitution token in the rule body specifies the location of the resultant text in
    408 * the original rule text.</p>
    409 *
    410 * <p>The meanings of the substitution token characters are as follows:</p>
    411 *
    412 * <table style="border-collapse: collapse;">
    413 *   <tr>
    414 *     <th>Syntax</th>
    415 *     <th>Usage</th>
    416 *     <th>Description</th>
    417 *   </tr>
    418 *   <tr style="border-top: 1px solid black;">
    419 *     <td style="white-space: nowrap;" rowspan="4">&gt;&gt;</td>
    420 *     <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in normal rule</td>
    421 *     <td>Divide the number by the rule's divisor and format the remainder</td>
    422 *   </tr>
    423 *   <tr>
    424 *     <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in negative-number rule</td>
    425 *     <td>Find the absolute value of the number and format the result</td>
    426 *   </tr>
    427 *   <tr>
    428 *     <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in fraction or default rule</td>
    429 *     <td>Isolate the number's fractional part and format it.</td>
    430 *   </tr>
    431 *   <tr>
    432 *     <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in rule in fraction rule set</td>
    433 *     <td>Not allowed.</td>
    434 *   </tr>
    435 *   <tr style="border-top: 1px solid black;">
    436 *     <td style="white-space: nowrap;" rowspan="2">&gt;&gt;&gt;</td>
    437 *     <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in normal rule</td>
    438 *     <td>Divide the number by the rule's divisor and format the remainder,
    439 *       but bypass the normal rule-selection process and just use the
    440 *       rule that precedes this one in this rule list.</td>
    441 *   </tr>
    442 *   <tr>
    443 *     <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in all other rules</td>
    444 *     <td>Not allowed.</td>
    445 *   </tr>
    446 *   <tr style="border-top: 1px solid black;">
    447 *     <td style="white-space: nowrap;" rowspan="4">&lt;&lt;</td>
    448 *     <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in normal rule</td>
    449 *     <td>Divide the number by the rule's divisor, perform floor() on the quotient,
    450 *         and format the resulting value.<br>
    451 *         If there is a DecimalFormat pattern between the &lt; characters and the
    452 *         rule does NOT also contain a &gt;&gt; substitution, we DON'T perform
    453 *         floor() on the quotient. The quotient is passed through to the DecimalFormat
    454 *         intact.  That is, for the value 1,900:<br>
    455 *         - "1/1000: &lt;&lt; thousand;" will produce "one thousand"<br>
    456 *         - "1/1000: &lt;0&lt; thousand;" will produce "2 thousand" (NOT "1 thousand")<br>
    457 *         - "1/1000: &lt;0&lt; seconds &gt;0&gt; milliseconds;" will produce "1 second 900 milliseconds"
    458 *     </td>
    459 *   </tr>
    460 *   <tr>
    461 *     <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in negative-number rule</td>
    462 *     <td>Not allowed.</td>
    463 *   </tr>
    464 *   <tr>
    465 *     <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in fraction or default rule</td>
    466 *     <td>Isolate the number's integral part and format it.</td>
    467 *   </tr>
    468 *   <tr>
    469 *     <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in rule in fraction rule set</td>
    470 *     <td>Multiply the number by the rule's base value and format the result.</td>
    471 *   </tr>
    472 *   <tr style="border-top: 1px solid black;">
    473 *     <td style="white-space: nowrap;">==</td>
    474 *     <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in all rule sets</td>
    475 *     <td>Format the number unchanged</td>
    476 *   </tr>
    477 *   <tr style="border-top: 1px solid black;">
    478 *     <td style="white-space: nowrap;" rowspan="6">[]<br/>[|]</td>
    479 *     <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in normal rule</td>
    480 *     <td>
    481 *       <ul>
    482 *         <li>When the number is not an even multiple of the rule's divisor, use the text and rules between the beginning square bracket,
    483 *         and the end square bracket or the | symbol.</li>
    484 *         <li>When the number is an even multiple of the rule's divisor, and no | symbol is used, omit the text.</li>
    485 *         <li>When the number is an even multiple of the rule's divisor, and | symbol is used, use the text and rules between the | symbol,
    486 *         and the end square bracket.</li>
    487 *       </ul>
    488 *     </td>
    489 *   </tr>
    490 *   <tr>
    491 *     <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in improper-fraction rule</td>
    492 *     <td>This syntax is the same as specifying both an x.x rule and a 0.x rule.
    493 *       <ul>
    494 *         <li>When the number is not between 0 and 1, use the text and rules between the beginning square bracket,
    495 *         and the end square bracket or the | symbol.</li>
    496 *         <li>When the number is between 0 and 1, and no | symbol is used, omit the text.</li>
    497 *         <li>When the number is between 0 and 1, and | symbol is used, use the text and rules between the | symbol,
    498 *         and the end square bracket.</li>
    499 *       </ul>
    500 *     </td>
    501 *   </tr>
    502 *   <tr>
    503 *     <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in default rule</td>
    504 *     <td>This syntax is the same as specifying both an x.x rule and an x.0 rule.
    505 *       <ul>
    506 *         <li>When the number is not an integer, use the text and rules between the beginning square bracket,
    507 *         and the end square bracket or the | symbol.</li>
    508 *         <li>When the number is an integer, and no | symbol is used, omit the text.</li>
    509 *         <li>When the number is an integer, and | symbol is used, use the text and rules between the | symbol,
    510 *         and the end square bracket.</li>
    511 *       </ul>
    512 *     </td>
    513 *   </tr>
    514 *   <tr>
    515 *     <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in rule in fraction rule set</td>
    516 *     <td>
    517 *       <ul>
    518 *         <li>When multiplying the number by the rule's base value does not yield 1, use the text and rules between the beginning square bracket,
    519 *         and the end square bracket or the | symbol.</li>
    520 *         <li>When multiplying the number by the rule's base value yields 1, and no | symbol is used, omit the text.</li>
    521 *         <li>When multiplying the number by the rule's base value yields 1, and | symbol is used, use the text and rules between the | symbol,
    522 *         and the end square bracket.</li>
    523 *       </ul>
    524 *     </td>
    525 *   </tr>
    526 *   <tr>
    527 *     <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in proper-fraction rule</td>
    528 *     <td>Not allowed.</td>
    529 *   </tr>
    530 *   <tr>
    531 *     <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in negative-number rule</td>
    532 *     <td>Not allowed.</td>
    533 *   </tr>
    534 *   <tr style="border-top: 1px solid black;">
    535 *     <td style="white-space: nowrap;">$(cardinal,<i>plural syntax</i>)$</td>
    536 *     <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in all rule sets</td>
    537 *     <td>This provides the ability to choose a word based on the number divided by the radix to the power of the
    538 *     exponent of the base value for the specified locale, which is normally equivalent to the &lt;&lt; value.
    539 *     This uses the cardinal plural rules from {@link PluralFormat}. All strings used in the plural format are treated
    540 *     as the same base value for parsing.</td>
    541 *   </tr>
    542 *   <tr style="border-top: 1px solid black;">
    543 *     <td style="white-space: nowrap;">$(ordinal,<i>plural syntax</i>)$</td>
    544 *     <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in all rule sets</td>
    545 *     <td>This provides the ability to choose a word based on the number divided by the radix to the power of the
    546 *     exponent of the base value for the specified locale, which is normally equivalent to the &lt;&lt; value.
    547 *     This uses the ordinal plural rules from {@link PluralFormat}. All strings used in the plural format are treated
    548 *     as the same base value for parsing.</td>
    549 *   </tr>
    550 * </table>
    551 *
    552 * <p>The substitution descriptor (i.e., the text between the token characters) may take one
    553 * of three forms:</p>
    554 *
    555 * <table style="border-collapse: collapse;">
    556 *   <tr>
    557 *     <th>Descriptor</th>
    558 *     <th>Description</th>
    559 *   </tr>
    560 *   <tr>
    561 *     <td style="vertical-align: top;">a rule set name</td>
    562 *     <td>Perform the mathematical operation on the number, and format the result using the
    563 *     named rule set.</td>
    564 *   </tr>
    565 *   <tr style="border-top: 1px solid black;">
    566 *     <td style="vertical-align: top;">a DecimalFormat pattern</td>
    567 *     <td>Perform the mathematical operation on the number, and format the result using a
    568 *     DecimalFormat with the specified pattern. The pattern must begin with 0 or #.</td>
    569 *   </tr>
    570 *   <tr style="border-top: 1px solid black;">
    571 *     <td style="vertical-align: top;">nothing</td>
    572 *     <td>Perform the mathematical operation on the number, and format the result using the rule
    573 *     set containing the current rule, except:<ul>
    574 *       <li>You can't have an empty substitution descriptor with a == substitution.</li>
    575 *       <li>If you omit the substitution descriptor in a &gt;&gt; substitution in a fraction rule,
    576 *         format the result one digit at a time using the rule set containing the current rule.</li>
    577 *       <li>If you omit the substitution descriptor in a &lt;&lt; substitution in a rule in a
    578 *         fraction rule set, format the result using the default rule set for this formatter.</li>
    579 *     </ul>
    580 *     </td>
    581 *   </tr>
    582 * </table>
    583 *
    584 * <p>Whitespace is ignored between a rule set name and a rule set body, between a rule
    585 * descriptor and a rule body, or between rules. If a rule body begins with an apostrophe,
    586 * the apostrophe is ignored, but all text after it becomes significant (this is how you can
    587 * have a rule's rule text begin with whitespace). There is no escape function: the semicolon
    588 * is not allowed in rule set names or in rule text, and the colon is not allowed in rule set
    589 * names. The characters beginning a substitution token are always treated as the beginning
    590 * of a substitution token.</p>
    591 *
    592 * <p>See the resource data and the demo program for annotated examples of real rule sets
    593 * using these features.</p>
    594 *
    595 * <p><em>User subclasses are not supported.</em> While clients may write
    596 * subclasses, such code will not necessarily work and will not be
    597 * guaranteed to work stably from release to release.
    598 *
    599 * <p><b>Localizations</b></p>
    600 * <p>Constructors are available that allow the specification of localizations for the
    601 * public rule sets (and also allow more control over what public rule sets are available).
    602 * Localization data is represented as a textual description.  The description represents
    603 * an array of arrays of string.  The first element is an array of the public rule set names,
    604 * each of these must be one of the public rule set names that appear in the rules.  Only
    605 * names in this array will be treated as public rule set names by the API.  Each subsequent
    606 * element is an array of localizations of these names.  The first element of one of these
    607 * subarrays is the locale name, and the remaining elements are localizations of the
    608 * public rule set names, in the same order as they were listed in the first array.</p>
    609 * <p>In the syntax, angle brackets '<', '>' are used to delimit the arrays, and comma ',' is used
    610 * to separate elements of an array.  Whitespace is ignored, unless quoted.</p>
    611 * <p>For example:<pre>
    612 * < < %foo, %bar, %baz >,
    613 *   < en, Foo, Bar, Baz >,
    614 *   < fr, 'le Foo', 'le Bar', 'le Baz' >
    615 *   < zh, \\u7532, \\u4e59, \\u4e19 > >
    616 * </pre></p>
    617 * @author Richard Gillam
    618 * @see NumberFormat
    619 * @see DecimalFormat
    620 * @see PluralFormat
    621 * @see PluralRules
    622 * @stable ICU 2.0
    623 */
    624 class U_I18N_API_CLASS RuleBasedNumberFormat : public NumberFormat {
    625 public:
    626 
    627  //-----------------------------------------------------------------------
    628  // constructors
    629  //-----------------------------------------------------------------------
    630 
    631    /**
    632     * Creates a RuleBasedNumberFormat that behaves according to the description
    633     * passed in.  The formatter uses the default locale.
    634     * @param rules A description of the formatter's desired behavior.
    635     * See the class documentation for a complete explanation of the description
    636     * syntax.
    637     * @param perror The parse error if an error was encountered.
    638     * @param status The status indicating whether the constructor succeeded.
    639     * @stable ICU 3.2
    640     */
    641    U_I18N_API RuleBasedNumberFormat(const UnicodeString& rules,
    642                                     UParseError& perror,
    643                                     UErrorCode& status);
    644 
    645    /**
    646     * Creates a RuleBasedNumberFormat that behaves according to the description
    647     * passed in.  The formatter uses the default locale.
    648     * <p>
    649     * The localizations data provides information about the public
    650     * rule sets and their localized display names for different
    651     * locales. The first element in the list is an array of the names
    652     * of the public rule sets.  The first element in this array is
    653     * the initial default ruleset.  The remaining elements in the
    654     * list are arrays of localizations of the names of the public
    655     * rule sets.  Each of these is one longer than the initial array,
    656     * with the first String being the ULocale ID, and the remaining
    657     * Strings being the localizations of the rule set names, in the
    658     * same order as the initial array.  Arrays are nullptr-terminated.
    659     * @param rules A description of the formatter's desired behavior.
    660     * See the class documentation for a complete explanation of the description
    661     * syntax.
    662     * @param localizations the localization information.
    663     * names in the description.  These will be copied by the constructor.
    664     * @param perror The parse error if an error was encountered.
    665     * @param status The status indicating whether the constructor succeeded.
    666     * @stable ICU 3.2
    667     */
    668    U_I18N_API RuleBasedNumberFormat(const UnicodeString& rules,
    669                                     const UnicodeString& localizations,
    670                                     UParseError& perror,
    671                                     UErrorCode& status);
    672 
    673    /**
    674     * Creates a RuleBasedNumberFormat that behaves according to the rules
    675     * passed in.  The formatter uses the specified locale to determine the
    676     * characters to use when formatting numerals, and to define equivalences
    677     * for lenient parsing.
    678     * @param rules The formatter rules.
    679     * See the class documentation for a complete explanation of the rule
    680     * syntax.
    681     * @param locale A locale that governs which characters are used for
    682     * formatting values in numerals and which characters are equivalent in
    683     * lenient parsing.
    684     * @param perror The parse error if an error was encountered.
    685     * @param status The status indicating whether the constructor succeeded.
    686     * @stable ICU 2.0
    687     */
    688    U_I18N_API RuleBasedNumberFormat(const UnicodeString& rules,
    689                                     const Locale& locale,
    690                                     UParseError& perror,
    691                                     UErrorCode& status);
    692 
    693    /**
    694     * Creates a RuleBasedNumberFormat that behaves according to the description
    695     * passed in.  The formatter uses the default locale.
    696     * <p>
    697     * The localizations data provides information about the public
    698     * rule sets and their localized display names for different
    699     * locales. The first element in the list is an array of the names
    700     * of the public rule sets.  The first element in this array is
    701     * the initial default ruleset.  The remaining elements in the
    702     * list are arrays of localizations of the names of the public
    703     * rule sets.  Each of these is one longer than the initial array,
    704     * with the first String being the ULocale ID, and the remaining
    705     * Strings being the localizations of the rule set names, in the
    706     * same order as the initial array.  Arrays are nullptr-terminated.
    707     * @param rules A description of the formatter's desired behavior.
    708     * See the class documentation for a complete explanation of the description
    709     * syntax.
    710     * @param localizations a list of localizations for the rule set
    711     * names in the description.  These will be copied by the constructor.
    712     * @param locale A locale that governs which characters are used for
    713     * formatting values in numerals and which characters are equivalent in
    714     * lenient parsing.
    715     * @param perror The parse error if an error was encountered.
    716     * @param status The status indicating whether the constructor succeeded.
    717     * @stable ICU 3.2
    718     */
    719    U_I18N_API RuleBasedNumberFormat(const UnicodeString& rules,
    720                                     const UnicodeString& localizations,
    721                                     const Locale& locale,
    722                                     UParseError& perror,
    723                                     UErrorCode& status);
    724 
    725  /**
    726   * Creates a RuleBasedNumberFormat from a predefined ruleset.  The selector
    727   * code chose among three possible predefined formats: spellout, ordinal,
    728   * and duration.
    729   * @param tag A selector code specifying which kind of formatter to create for that
    730   * locale.  There are four legal values: URBNF_SPELLOUT, which creates a formatter that
    731   * spells out a value in words in the desired language, URBNF_ORDINAL, which attaches
    732   * an ordinal suffix from the desired language to the end of a number (e.g. "123rd"),
    733   * URBNF_DURATION, which formats a duration in seconds as hours, minutes, and seconds always rounding down,
    734   * and URBNF_NUMBERING_SYSTEM, which is used to invoke rules for alternate numbering
    735   * systems such as the Hebrew numbering system, or for Roman Numerals, etc.
    736   * NOTE: If you use URBNF_NUMBERING_SYSTEM, you must also call setDefaultRuleSet() to
    737   * specify the exact numbering system you want to use.  If you want the default numbering system
    738   * for the locale, call NumberFormat::createInstance() instead of creating a RuleBasedNumberFormat directly.
    739   * @param locale The locale for the formatter.
    740   * @param status The status indicating whether the constructor succeeded.
    741   * @stable ICU 2.0
    742   */
    743  U_I18N_API RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& locale, UErrorCode& status);
    744 
    745  //-----------------------------------------------------------------------
    746  // boilerplate
    747  //-----------------------------------------------------------------------
    748 
    749  /**
    750   * Copy constructor
    751   * @param rhs    the object to be copied from.
    752   * @stable ICU 2.6
    753   */
    754  U_I18N_API RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs);
    755 
    756  /**
    757   * Assignment operator
    758   * @param rhs    the object to be copied from.
    759   * @stable ICU 2.6
    760   */
    761  U_I18N_API RuleBasedNumberFormat& operator=(const RuleBasedNumberFormat& rhs);
    762 
    763  /**
    764   * Release memory allocated for a RuleBasedNumberFormat when you are finished with it.
    765   * @stable ICU 2.6
    766   */
    767  U_I18N_API virtual ~RuleBasedNumberFormat();
    768 
    769  /**
    770   * Clone this object polymorphically.  The caller is responsible
    771   * for deleting the result when done.
    772   * @return  A copy of the object.
    773   * @stable ICU 2.6
    774   */
    775  U_I18N_API virtual RuleBasedNumberFormat* clone() const override;
    776 
    777  /**
    778   * Return true if the given Format objects are semantically equal.
    779   * Objects of different subclasses are considered unequal.
    780   * @param other    the object to be compared with.
    781   * @return        true if the given Format objects are semantically equal.
    782   * @stable ICU 2.6
    783   */
    784  U_I18N_API virtual bool operator==(const Format& other) const override;
    785 
    786 //-----------------------------------------------------------------------
    787 // public API functions
    788 //-----------------------------------------------------------------------
    789 
    790  /**
    791   * return the rules that were provided to the RuleBasedNumberFormat.
    792   * @return the result String that was passed in
    793   * @stable ICU 2.0
    794   */
    795  U_I18N_API virtual UnicodeString getRules() const;
    796 
    797  /**
    798   * Return the number of public rule set names.
    799   * @return the number of public rule set names.
    800   * @stable ICU 2.0
    801   */
    802  U_I18N_API virtual int32_t getNumberOfRuleSetNames() const;
    803 
    804  /**
    805   * Return the name of the index'th public ruleSet.  If index is not valid,
    806   * the function returns null.
    807   * @param index the index of the ruleset
    808   * @return the name of the index'th public ruleSet.
    809   * @stable ICU 2.0
    810   */
    811  U_I18N_API virtual UnicodeString getRuleSetName(int32_t index) const;
    812 
    813  /**
    814   * Return the number of locales for which we have localized rule set display names.
    815   * @return the number of locales for which we have localized rule set display names.
    816   * @stable ICU 3.2
    817   */
    818  U_I18N_API virtual int32_t getNumberOfRuleSetDisplayNameLocales() const;
    819 
    820  /**
    821   * Return the index'th display name locale.
    822   * @param index the index of the locale
    823   * @param status set to a failure code when this function fails
    824   * @return the locale
    825   * @see #getNumberOfRuleSetDisplayNameLocales
    826   * @stable ICU 3.2
    827   */
    828  U_I18N_API virtual Locale getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const;
    829 
    830    /**
    831     * Return the rule set display names for the provided locale.  These are in the same order
    832     * as those returned by getRuleSetName.  The locale is matched against the locales for
    833     * which there is display name data, using normal fallback rules.  If no locale matches,
    834     * the default display names are returned.  (These are the internal rule set names minus
    835     * the leading '%'.)
    836     * @param index the index of the rule set
    837     * @param locale the locale (returned by getRuleSetDisplayNameLocales) for which the localized
    838     * display name is desired
    839     * @return the display name for the given index, which might be bogus if there is an error
    840     * @see #getRuleSetName
    841     * @stable ICU 3.2
    842     */
    843    U_I18N_API virtual UnicodeString getRuleSetDisplayName(int32_t index,
    844                                                           const Locale& locale = Locale::getDefault());
    845 
    846    /**
    847     * Return the rule set display name for the provided rule set and locale.
    848     * The locale is matched against the locales for which there is display name data, using
    849     * normal fallback rules.  If no locale matches, the default display name is returned.
    850     * @return the display name for the rule set
    851     * @stable ICU 3.2
    852     * @see #getRuleSetDisplayName
    853     */
    854    U_I18N_API virtual UnicodeString getRuleSetDisplayName(const UnicodeString& ruleSetName,
    855                                                           const Locale& locale = Locale::getDefault());
    856 
    857 
    858  using NumberFormat::format;
    859 
    860  /**
    861   * Formats the specified 32-bit number using the default ruleset.
    862   * @param number The number to format.
    863   * @param toAppendTo the string that will hold the (appended) result
    864   * @param pos the fieldposition
    865   * @return A textual representation of the number.
    866   * @stable ICU 2.0
    867   */
    868  U_I18N_API virtual UnicodeString& format(int32_t number,
    869                                           UnicodeString& toAppendTo,
    870                                           FieldPosition& pos) const override;
    871 
    872  /**
    873   * Formats the specified 64-bit number using the default ruleset.
    874   * @param number The number to format.
    875   * @param toAppendTo the string that will hold the (appended) result
    876   * @param pos the fieldposition
    877   * @return A textual representation of the number.
    878   * @stable ICU 2.1
    879   */
    880  U_I18N_API virtual UnicodeString& format(int64_t number,
    881                                           UnicodeString& toAppendTo,
    882                                           FieldPosition& pos) const override;
    883  /**
    884   * Formats the specified number using the default ruleset.
    885   * @param number The number to format.
    886   * @param toAppendTo the string that will hold the (appended) result
    887   * @param pos the fieldposition
    888   * @return A textual representation of the number.
    889   * @stable ICU 2.0
    890   */
    891  U_I18N_API virtual UnicodeString& format(double number,
    892                                           UnicodeString& toAppendTo,
    893                                           FieldPosition& pos) const override;
    894 
    895  /**
    896   * Formats the specified number using the named ruleset.
    897   * @param number The number to format.
    898   * @param ruleSetName The name of the rule set to format the number with.
    899   * This must be the name of a valid public rule set for this formatter.
    900   * @param toAppendTo the string that will hold the (appended) result
    901   * @param pos the fieldposition
    902   * @param status the status
    903   * @return A textual representation of the number.
    904   * @stable ICU 2.0
    905   */
    906  U_I18N_API virtual UnicodeString& format(int32_t number,
    907                                           const UnicodeString& ruleSetName,
    908                                           UnicodeString& toAppendTo,
    909                                           FieldPosition& pos,
    910                                           UErrorCode& status) const;
    911  /**
    912   * Formats the specified 64-bit number using the named ruleset.
    913   * @param number The number to format.
    914   * @param ruleSetName The name of the rule set to format the number with.
    915   * This must be the name of a valid public rule set for this formatter.
    916   * @param toAppendTo the string that will hold the (appended) result
    917   * @param pos the fieldposition
    918   * @param status the status
    919   * @return A textual representation of the number.
    920   * @stable ICU 2.1
    921   */
    922  U_I18N_API virtual UnicodeString& format(int64_t number,
    923                                           const UnicodeString& ruleSetName,
    924                                           UnicodeString& toAppendTo,
    925                                           FieldPosition& pos,
    926                                           UErrorCode& status) const;
    927  /**
    928   * Formats the specified number using the named ruleset.
    929   * @param number The number to format.
    930   * @param ruleSetName The name of the rule set to format the number with.
    931   * This must be the name of a valid public rule set for this formatter.
    932   * @param toAppendTo the string that will hold the (appended) result
    933   * @param pos the fieldposition
    934   * @param status the status
    935   * @return A textual representation of the number.
    936   * @stable ICU 2.0
    937   */
    938  U_I18N_API virtual UnicodeString& format(double number,
    939                                           const UnicodeString& ruleSetName,
    940                                           UnicodeString& toAppendTo,
    941                                           FieldPosition& pos,
    942                                           UErrorCode& status) const;
    943 
    944 protected:
    945    /**
    946     * Format a decimal number.
    947     * The number is a DigitList wrapper onto a floating point decimal number.
    948     * The default implementation in NumberFormat converts the decimal number
    949     * to a double and formats that.  Subclasses of NumberFormat that want
    950     * to specifically handle big decimal numbers must override this method.
    951     * class DecimalFormat does so.
    952     *
    953     * @param number    The number, a DigitList format Decimal Floating Point.
    954     * @param appendTo  Output parameter to receive result.
    955     *                  Result is appended to existing contents.
    956     * @param pos       On input: an alignment field, if desired.
    957     *                  On output: the offsets of the alignment field.
    958     * @param status    Output param filled with success/failure status.
    959     * @return          Reference to 'appendTo' parameter.
    960     * @internal
    961     */
    962    virtual UnicodeString& format(const number::impl::DecimalQuantity &number,
    963                                  UnicodeString& appendTo,
    964                                  FieldPosition& pos,
    965                                  UErrorCode& status) const override;
    966 public:
    967 
    968  using NumberFormat::parse;
    969 
    970  /**
    971   * Parses the specified string, beginning at the specified position, according
    972   * to this formatter's rules.  This will match the string against all of the
    973   * formatter's public rule sets and return the value corresponding to the longest
    974   * parseable substring.  This function's behavior is affected by the lenient
    975   * parse mode.
    976   * @param text The string to parse
    977   * @param result the result of the parse, either a double or a long.
    978   * @param parsePosition On entry, contains the position of the first character
    979   * in "text" to examine.  On exit, has been updated to contain the position
    980   * of the first character in "text" that wasn't consumed by the parse.
    981   * @see #setLenient
    982   * @stable ICU 2.0
    983   */
    984  U_I18N_API virtual void parse(const UnicodeString& text,
    985                                Formattable& result,
    986                                ParsePosition& parsePosition) const override;
    987 
    988 #if !UCONFIG_NO_COLLATION
    989 
    990  /**
    991   * Turns lenient parse mode on and off.
    992   *
    993   * When in lenient parse mode, the formatter uses a Collator for parsing the text.
    994   * Only primary differences are treated as significant.  This means that case
    995   * differences, accent differences, alternate spellings of the same letter
    996   * (e.g., ae and a-umlaut in German), ignorable characters, etc. are ignored in
    997   * matching the text.  In many cases, numerals will be accepted in place of words
    998   * or phrases as well.
    999   *
   1000   * For example, all of the following will correctly parse as 255 in English in
   1001   * lenient-parse mode:
   1002   * <br>"two hundred fifty-five"
   1003   * <br>"two hundred fifty five"
   1004   * <br>"TWO HUNDRED FIFTY-FIVE"
   1005   * <br>"twohundredfiftyfive"
   1006   * <br>"2 hundred fifty-5"
   1007   *
   1008   * The Collator used is determined by the locale that was
   1009   * passed to this object on construction.  The description passed to this object
   1010   * on construction may supply additional collation rules that are appended to the
   1011   * end of the default collator for the locale, enabling additional equivalences
   1012   * (such as adding more ignorable characters or permitting spelled-out version of
   1013   * symbols; see the demo program for examples).
   1014   *
   1015   * It's important to emphasize that even strict parsing is relatively lenient: it
   1016   * will accept some text that it won't produce as output.  In English, for example,
   1017   * it will correctly parse "two hundred zero" and "fifteen hundred".
   1018   *
   1019   * @param enabled If true, turns lenient-parse mode on; if false, turns it off.
   1020   * @see RuleBasedCollator
   1021   * @stable ICU 2.0
   1022   */
   1023  U_I18N_API virtual void setLenient(UBool enabled) override;
   1024 
   1025  /**
   1026   * Returns true if lenient-parse mode is turned on.  Lenient parsing is off
   1027   * by default.
   1028   * @return true if lenient-parse mode is turned on.
   1029   * @see #setLenient
   1030   * @stable ICU 2.0
   1031   */
   1032  U_I18N_API virtual inline UBool isLenient() const override;
   1033 
   1034 #endif
   1035 
   1036  /**
   1037   * Override the default rule set to use.  If ruleSetName is null, reset
   1038   * to the initial default rule set.  If the rule set is not a public rule set name,
   1039   * U_ILLEGAL_ARGUMENT_ERROR is returned in status.
   1040   * @param ruleSetName the name of the rule set, or null to reset the initial default.
   1041   * @param status set to failure code when a problem occurs.
   1042   * @stable ICU 2.6
   1043   */
   1044  U_I18N_API virtual void setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status);
   1045 
   1046  /**
   1047   * Return the name of the current default rule set.  If the current rule set is
   1048   * not public, returns a bogus (and empty) UnicodeString.
   1049   * @return the name of the current default rule set
   1050   * @stable ICU 3.0
   1051   */
   1052  U_I18N_API virtual UnicodeString getDefaultRuleSetName() const;
   1053 
   1054  /**
   1055   * Set a particular UDisplayContext value in the formatter, such as
   1056   * UDISPCTX_CAPITALIZATION_FOR_STANDALONE. Note: For getContext, see
   1057   * NumberFormat.
   1058   * @param value The UDisplayContext value to set.
   1059   * @param status Input/output status. If at entry this indicates a failure
   1060   *               status, the function will do nothing; otherwise this will be
   1061   *               updated with any new status from the function. 
   1062   * @stable ICU 53
   1063   */
   1064  U_I18N_API virtual void setContext(UDisplayContext value, UErrorCode& status) override;
   1065 
   1066    /**
   1067     * Get the rounding mode.
   1068     * @return A rounding mode
   1069     * @stable ICU 60
   1070     */
   1071    U_I18N_API virtual ERoundingMode getRoundingMode() const override;
   1072 
   1073    /**
   1074     * Set the rounding mode.
   1075     * @param roundingMode A rounding mode
   1076     * @stable ICU 60
   1077     */
   1078    U_I18N_API virtual void setRoundingMode(ERoundingMode roundingMode) override;
   1079 
   1080 public:
   1081    /**
   1082     * ICU "poor man's RTTI", returns a UClassID for this class.
   1083     *
   1084     * @stable ICU 2.8
   1085     */
   1086    U_I18N_API static UClassID getStaticClassID();
   1087 
   1088    /**
   1089     * ICU "poor man's RTTI", returns a UClassID for the actual class.
   1090     *
   1091     * @stable ICU 2.8
   1092     */
   1093    U_I18N_API virtual UClassID getDynamicClassID() const override;
   1094 
   1095    /**
   1096     * Sets the decimal format symbols, which is generally not changed
   1097     * by the programmer or user. The formatter takes ownership of
   1098     * symbolsToAdopt; the client must not delete it.
   1099     *
   1100     * @param symbolsToAdopt DecimalFormatSymbols to be adopted.
   1101     * @stable ICU 49
   1102     */
   1103    U_I18N_API virtual void adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt);
   1104 
   1105    /**
   1106     * Sets the decimal format symbols, which is generally not changed
   1107     * by the programmer or user. A clone of the symbols is created and
   1108     * the symbols is _not_ adopted; the client is still responsible for
   1109     * deleting it.
   1110     *
   1111     * @param symbols DecimalFormatSymbols.
   1112     * @stable ICU 49
   1113     */
   1114    U_I18N_API virtual void setDecimalFormatSymbols(const DecimalFormatSymbols& symbols);
   1115 
   1116 private:
   1117    RuleBasedNumberFormat() = delete; // default constructor not implemented
   1118 
   1119    // this will ref the localizations if they are not nullptr
   1120    // caller must deref to get adoption
   1121    RuleBasedNumberFormat(const UnicodeString& description, LocalizationInfo* localizations,
   1122              const Locale& locale, UParseError& perror, UErrorCode& status);
   1123 
   1124    void init(const UnicodeString& rules, LocalizationInfo* localizations, UParseError& perror, UErrorCode& status);
   1125    void initCapitalizationContextInfo(const Locale& thelocale);
   1126    void dispose();
   1127    void stripWhitespace(UnicodeString& src);
   1128    void initDefaultRuleSet();
   1129    NFRuleSet* findRuleSet(const UnicodeString& name, UErrorCode& status) const;
   1130 
   1131    /* friend access */
   1132    friend class NFSubstitution;
   1133    friend class NFRule;
   1134    friend class NFRuleSet;
   1135    friend class FractionalPartSubstitution;
   1136 
   1137    inline NFRuleSet * getDefaultRuleSet() const;
   1138    const RuleBasedCollator * getCollator() const;
   1139    DecimalFormatSymbols * initializeDecimalFormatSymbols(UErrorCode &status);
   1140    const DecimalFormatSymbols * getDecimalFormatSymbols() const;
   1141    NFRule * initializeDefaultInfinityRule(UErrorCode &status);
   1142    const NFRule * getDefaultInfinityRule() const;
   1143    NFRule * initializeDefaultNaNRule(UErrorCode &status);
   1144    const NFRule * getDefaultNaNRule() const;
   1145    PluralFormat *createPluralFormat(UPluralType pluralType, const UnicodeString &pattern, UErrorCode& status) const;
   1146    UnicodeString& adjustForCapitalizationContext(int32_t startPos, UnicodeString& currentResult, UErrorCode& status) const;
   1147    UnicodeString& format(int64_t number, NFRuleSet *ruleSet, UnicodeString& toAppendTo, UErrorCode& status) const;
   1148    void format(double number, NFRuleSet& rs, UnicodeString& toAppendTo, UErrorCode& status) const;
   1149 
   1150 private:
   1151    NFRuleSet **fRuleSets;
   1152    UnicodeString* ruleSetDescriptions;
   1153    int32_t numRuleSets;
   1154    NFRuleSet *defaultRuleSet;
   1155    Locale locale;
   1156    RuleBasedCollator* collator;
   1157    DecimalFormatSymbols* decimalFormatSymbols;
   1158    NFRule *defaultInfinityRule;
   1159    NFRule *defaultNaNRule;
   1160    ERoundingMode fRoundingMode;
   1161    UBool lenient;
   1162    UnicodeString* lenientParseRules;
   1163    LocalizationInfo* localizations;
   1164    UnicodeString originalDescription;
   1165    UBool capitalizationInfoSet;
   1166    UBool capitalizationForUIListMenu;
   1167    UBool capitalizationForStandAlone;
   1168    BreakIterator* capitalizationBrkIter;
   1169 };
   1170 
   1171 // ---------------
   1172 
   1173 #if !UCONFIG_NO_COLLATION
   1174 
   1175 inline UBool
   1176 RuleBasedNumberFormat::isLenient() const {
   1177    return lenient;
   1178 }
   1179 
   1180 #endif
   1181 
   1182 inline NFRuleSet*
   1183 RuleBasedNumberFormat::getDefaultRuleSet() const {
   1184    return defaultRuleSet;
   1185 }
   1186 
   1187 U_NAMESPACE_END
   1188 
   1189 /* U_HAVE_RBNF */
   1190 #endif
   1191 
   1192 #endif /* U_SHOW_CPLUSPLUS_API */
   1193 
   1194 /* RBNF_H */
   1195 #endif
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE