tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

messagepattern.h (34600B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *   Copyright (C) 2011-2013, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 *   file name:  messagepattern.h
      9 *   encoding:   UTF-8
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2011mar14
     14 *   created by: Markus W. Scherer
     15 */
     16 
     17 #ifndef __MESSAGEPATTERN_H__
     18 #define __MESSAGEPATTERN_H__
     19 
     20 /**
     21 * \file
     22 * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns.
     23 */
     24 
     25 #include "unicode/utypes.h"
     26 
     27 #if U_SHOW_CPLUSPLUS_API
     28 
     29 #if !UCONFIG_NO_FORMATTING
     30 
     31 #include "unicode/parseerr.h"
     32 #include "unicode/unistr.h"
     33 
     34 /**
     35 * Mode for when an apostrophe starts quoted literal text for MessageFormat output.
     36 * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h
     37 * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE).
     38 * <p>
     39 * A pair of adjacent apostrophes always results in a single apostrophe in the output,
     40 * even when the pair is between two single, text-quoting apostrophes.
     41 * <p>
     42 * The following table shows examples of desired MessageFormat.format() output
     43 * with the pattern strings that yield that output.
     44 * <p>
     45 * <table>
     46 *   <tr>
     47 *     <th>Desired output</th>
     48 *     <th>DOUBLE_OPTIONAL</th>
     49 *     <th>DOUBLE_REQUIRED</th>
     50 *   </tr>
     51 *   <tr>
     52 *     <td>I see {many}</td>
     53 *     <td>I see '{many}'</td>
     54 *     <td>(same)</td>
     55 *   </tr>
     56 *   <tr>
     57 *     <td>I said {'Wow!'}</td>
     58 *     <td>I said '{''Wow!''}'</td>
     59 *     <td>(same)</td>
     60 *   </tr>
     61 *   <tr>
     62 *     <td>I don't know</td>
     63 *     <td>I don't know OR<br> I don''t know</td>
     64 *     <td>I don''t know</td>
     65 *   </tr>
     66 * </table>
     67 * @stable ICU 4.8
     68 * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
     69 */
     70 enum UMessagePatternApostropheMode {
     71    /**
     72     * A literal apostrophe is represented by
     73     * either a single or a double apostrophe pattern character.
     74     * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
     75     * if it immediately precedes a curly brace {},
     76     * or a pipe symbol | if inside a choice format,
     77     * or a pound symbol # if inside a plural format.
     78     * <p>
     79     * This is the default behavior starting with ICU 4.8.
     80     * @stable ICU 4.8
     81     */
     82    UMSGPAT_APOS_DOUBLE_OPTIONAL,
     83    /**
     84     * A literal apostrophe must be represented by
     85     * a double apostrophe pattern character.
     86     * A single apostrophe always starts quoted literal text.
     87     * <p>
     88     * This is the behavior of ICU 4.6 and earlier, and of the JDK.
     89     * @stable ICU 4.8
     90     */
     91    UMSGPAT_APOS_DOUBLE_REQUIRED
     92 };
     93 /**
     94 * @stable ICU 4.8
     95 */
     96 typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode;
     97 
     98 /**
     99 * MessagePattern::Part type constants.
    100 * @stable ICU 4.8
    101 */
    102 enum UMessagePatternPartType {
    103    /**
    104     * Start of a message pattern (main or nested).
    105     * The length is 0 for the top-level message
    106     * and for a choice argument sub-message, otherwise 1 for the '{'.
    107     * The value indicates the nesting level, starting with 0 for the main message.
    108     * <p>
    109     * There is always a later MSG_LIMIT part.
    110     * @stable ICU 4.8
    111     */
    112    UMSGPAT_PART_TYPE_MSG_START,
    113    /**
    114     * End of a message pattern (main or nested).
    115     * The length is 0 for the top-level message and
    116     * the last sub-message of a choice argument,
    117     * otherwise 1 for the '}' or (in a choice argument style) the '|'.
    118     * The value indicates the nesting level, starting with 0 for the main message.
    119     * @stable ICU 4.8
    120     */
    121    UMSGPAT_PART_TYPE_MSG_LIMIT,
    122    /**
    123     * Indicates a substring of the pattern string which is to be skipped when formatting.
    124     * For example, an apostrophe that begins or ends quoted text
    125     * would be indicated with such a part.
    126     * The value is undefined and currently always 0.
    127     * @stable ICU 4.8
    128     */
    129    UMSGPAT_PART_TYPE_SKIP_SYNTAX,
    130    /**
    131     * Indicates that a syntax character needs to be inserted for auto-quoting.
    132     * The length is 0.
    133     * The value is the character code of the insertion character. (U+0027=APOSTROPHE)
    134     * @stable ICU 4.8
    135     */
    136    UMSGPAT_PART_TYPE_INSERT_CHAR,
    137    /**
    138     * Indicates a syntactic (non-escaped) # symbol in a plural variant.
    139     * When formatting, replace this part's substring with the
    140     * (value-offset) for the plural argument value.
    141     * The value is undefined and currently always 0.
    142     * @stable ICU 4.8
    143     */
    144    UMSGPAT_PART_TYPE_REPLACE_NUMBER,
    145    /**
    146     * Start of an argument.
    147     * The length is 1 for the '{'.
    148     * The value is the ordinal value of the ArgType. Use getArgType().
    149     * <p>
    150     * This part is followed by either an ARG_NUMBER or ARG_NAME,
    151     * followed by optional argument sub-parts (see UMessagePatternArgType constants)
    152     * and finally an ARG_LIMIT part.
    153     * @stable ICU 4.8
    154     */
    155    UMSGPAT_PART_TYPE_ARG_START,
    156    /**
    157     * End of an argument.
    158     * The length is 1 for the '}'.
    159     * The value is the ordinal value of the ArgType. Use getArgType().
    160     * @stable ICU 4.8
    161     */
    162    UMSGPAT_PART_TYPE_ARG_LIMIT,
    163    /**
    164     * The argument number, provided by the value.
    165     * @stable ICU 4.8
    166     */
    167    UMSGPAT_PART_TYPE_ARG_NUMBER,
    168    /**
    169     * The argument name.
    170     * The value is undefined and currently always 0.
    171     * @stable ICU 4.8
    172     */
    173    UMSGPAT_PART_TYPE_ARG_NAME,
    174    /**
    175     * The argument type.
    176     * The value is undefined and currently always 0.
    177     * @stable ICU 4.8
    178     */
    179    UMSGPAT_PART_TYPE_ARG_TYPE,
    180    /**
    181     * The argument style text.
    182     * The value is undefined and currently always 0.
    183     * @stable ICU 4.8
    184     */
    185    UMSGPAT_PART_TYPE_ARG_STYLE,
    186    /**
    187     * A selector substring in a "complex" argument style.
    188     * The value is undefined and currently always 0.
    189     * @stable ICU 4.8
    190     */
    191    UMSGPAT_PART_TYPE_ARG_SELECTOR,
    192    /**
    193     * An integer value, for example the offset or an explicit selector value
    194     * in a PluralFormat style.
    195     * The part value is the integer value.
    196     * @stable ICU 4.8
    197     */
    198    UMSGPAT_PART_TYPE_ARG_INT,
    199    /**
    200     * A numeric value, for example the offset or an explicit selector value
    201     * in a PluralFormat style.
    202     * The part value is an index into an internal array of numeric values;
    203     * use getNumericValue().
    204     * @stable ICU 4.8
    205     */
    206    UMSGPAT_PART_TYPE_ARG_DOUBLE
    207 };
    208 /**
    209 * @stable ICU 4.8
    210 */
    211 typedef enum UMessagePatternPartType UMessagePatternPartType;
    212 
    213 /**
    214 * Argument type constants.
    215 * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
    216 *
    217 * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
    218 * with a nesting level one greater than the surrounding message.
    219 * @stable ICU 4.8
    220 */
    221 enum UMessagePatternArgType {
    222    /**
    223     * The argument has no specified type.
    224     * @stable ICU 4.8
    225     */
    226    UMSGPAT_ARG_TYPE_NONE,
    227    /**
    228     * The argument has a "simple" type which is provided by the ARG_TYPE part.
    229     * An ARG_STYLE part might follow that.
    230     * @stable ICU 4.8
    231     */
    232    UMSGPAT_ARG_TYPE_SIMPLE,
    233    /**
    234     * The argument is a ChoiceFormat with one or more
    235     * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
    236     * @stable ICU 4.8
    237     */
    238    UMSGPAT_ARG_TYPE_CHOICE,
    239    /**
    240     * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
    241     * (e.g., offset:1)
    242     * and one or more (ARG_SELECTOR [explicit-value] message) tuples.
    243     * If the selector has an explicit value (e.g., =2), then
    244     * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
    245     * Otherwise the message immediately follows the ARG_SELECTOR.
    246     * @stable ICU 4.8
    247     */
    248    UMSGPAT_ARG_TYPE_PLURAL,
    249    /**
    250     * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
    251     * @stable ICU 4.8
    252     */
    253    UMSGPAT_ARG_TYPE_SELECT,
    254    /**
    255     * The argument is an ordinal-number PluralFormat
    256     * with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL.
    257     * @stable ICU 50
    258     */
    259    UMSGPAT_ARG_TYPE_SELECTORDINAL
    260 };
    261 /**
    262 * @stable ICU 4.8
    263 */
    264 typedef enum UMessagePatternArgType UMessagePatternArgType;
    265 
    266 /**
    267 * \def UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE
    268 * Returns true if the argument type has a plural style part sequence and semantics,
    269 * for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL.
    270 * @stable ICU 50
    271 */
    272 #define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \
    273    ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL)
    274 
    275 enum {
    276    /**
    277     * Return value from MessagePattern.validateArgumentName() for when
    278     * the string is a valid "pattern identifier" but not a number.
    279     * @stable ICU 4.8
    280     */
    281    UMSGPAT_ARG_NAME_NOT_NUMBER=-1,
    282 
    283    /**
    284     * Return value from MessagePattern.validateArgumentName() for when
    285     * the string is invalid.
    286     * It might not be a valid "pattern identifier",
    287     * or it have only ASCII digits but there is a leading zero or the number is too large.
    288     * @stable ICU 4.8
    289     */
    290    UMSGPAT_ARG_NAME_NOT_VALID=-2
    291 };
    292 
    293 /**
    294 * Special value that is returned by getNumericValue(Part) when no
    295 * numeric value is defined for a part.
    296 * @see MessagePattern.getNumericValue()
    297 * @stable ICU 4.8
    298 */
    299 #define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
    300 
    301 U_NAMESPACE_BEGIN
    302 
    303 class MessagePatternDoubleList;
    304 class MessagePatternPartsList;
    305 
    306 /**
    307 * Parses and represents ICU MessageFormat patterns.
    308 * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat.
    309 * Used in the implementations of those classes as well as in tools
    310 * for message validation, translation and format conversion.
    311 * <p>
    312 * The parser handles all syntax relevant for identifying message arguments.
    313 * This includes "complex" arguments whose style strings contain
    314 * nested MessageFormat pattern substrings.
    315 * For "simple" arguments (with no nested MessageFormat pattern substrings),
    316 * the argument style is not parsed any further.
    317 * <p>
    318 * The parser handles named and numbered message arguments and allows both in one message.
    319 * <p>
    320 * Once a pattern has been parsed successfully, iterate through the parsed data
    321 * with countParts(), getPart() and related methods.
    322 * <p>
    323 * The data logically represents a parse tree, but is stored and accessed
    324 * as a list of "parts" for fast and simple parsing and to minimize object allocations.
    325 * Arguments and nested messages are best handled via recursion.
    326 * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns
    327 * the index of the corresponding _LIMIT "part".
    328 * <p>
    329 * List of "parts":
    330 * <pre>
    331 * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
    332 * argument = noneArg | simpleArg | complexArg
    333 * complexArg = choiceArg | pluralArg | selectArg
    334 *
    335 * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
    336 * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
    337 * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
    338 * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
    339 * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
    340 *
    341 * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
    342 * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
    343 * selectStyle = (ARG_SELECTOR message)+
    344 * </pre>
    345 * <ul>
    346 *   <li>Literal output text is not represented directly by "parts" but accessed
    347 *       between parts of a message, from one part's getLimit() to the next part's getIndex().
    348 *   <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE.
    349 *   <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or
    350 *       the less-than-or-equal-to sign (U+2264).
    351 *   <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value.
    352 *       The optional numeric Part between each (ARG_SELECTOR, message) pair
    353 *       is the value of an explicit-number selector like "=2",
    354 *       otherwise the selector is a non-numeric identifier.
    355 *   <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle.
    356 * </ul>
    357 * <p>
    358 * This class is not intended for public subclassing.
    359 *
    360 * @stable ICU 4.8
    361 */
    362 class U_COMMON_API MessagePattern : public UObject {
    363 public:
    364    /**
    365     * Constructs an empty MessagePattern with default UMessagePatternApostropheMode.
    366     * @param errorCode Standard ICU error code. Its input value must
    367     *                  pass the U_SUCCESS() test, or else the function returns
    368     *                  immediately. Check for U_FAILURE() on output or use with
    369     *                  function chaining. (See User Guide for details.)
    370     * @stable ICU 4.8
    371     */
    372    MessagePattern(UErrorCode &errorCode);
    373 
    374    /**
    375     * Constructs an empty MessagePattern.
    376     * @param mode Explicit UMessagePatternApostropheMode.
    377     * @param errorCode Standard ICU error code. Its input value must
    378     *                  pass the U_SUCCESS() test, or else the function returns
    379     *                  immediately. Check for U_FAILURE() on output or use with
    380     *                  function chaining. (See User Guide for details.)
    381     * @stable ICU 4.8
    382     */
    383    MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode);
    384 
    385    /**
    386     * Constructs a MessagePattern with default UMessagePatternApostropheMode and
    387     * parses the MessageFormat pattern string.
    388     * @param pattern a MessageFormat pattern string
    389     * @param parseError Struct to receive information on the position
    390     *                   of an error within the pattern.
    391     *                   Can be nullptr.
    392     * @param errorCode Standard ICU error code. Its input value must
    393     *                  pass the U_SUCCESS() test, or else the function returns
    394     *                  immediately. Check for U_FAILURE() on output or use with
    395     *                  function chaining. (See User Guide for details.)
    396     * TODO: turn @throws into UErrorCode specifics?
    397     * @throws IllegalArgumentException for syntax errors in the pattern string
    398     * @throws IndexOutOfBoundsException if certain limits are exceeded
    399     *         (e.g., argument number too high, argument name too long, etc.)
    400     * @throws NumberFormatException if a number could not be parsed
    401     * @stable ICU 4.8
    402     */
    403    MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
    404 
    405    /**
    406     * Copy constructor.
    407     * @param other Object to copy.
    408     * @stable ICU 4.8
    409     */
    410    MessagePattern(const MessagePattern &other);
    411 
    412    /**
    413     * Assignment operator.
    414     * @param other Object to copy.
    415     * @return *this=other
    416     * @stable ICU 4.8
    417     */
    418    MessagePattern &operator=(const MessagePattern &other);
    419 
    420    /**
    421     * Destructor.
    422     * @stable ICU 4.8
    423     */
    424    virtual ~MessagePattern();
    425 
    426    /**
    427     * Parses a MessageFormat pattern string.
    428     * @param pattern a MessageFormat pattern string
    429     * @param parseError Struct to receive information on the position
    430     *                   of an error within the pattern.
    431     *                   Can be nullptr.
    432     * @param errorCode Standard ICU error code. Its input value must
    433     *                  pass the U_SUCCESS() test, or else the function returns
    434     *                  immediately. Check for U_FAILURE() on output or use with
    435     *                  function chaining. (See User Guide for details.)
    436     * @return *this
    437     * @throws IllegalArgumentException for syntax errors in the pattern string
    438     * @throws IndexOutOfBoundsException if certain limits are exceeded
    439     *         (e.g., argument number too high, argument name too long, etc.)
    440     * @throws NumberFormatException if a number could not be parsed
    441     * @stable ICU 4.8
    442     */
    443    MessagePattern &parse(const UnicodeString &pattern,
    444                          UParseError *parseError, UErrorCode &errorCode);
    445 
    446    /**
    447     * Parses a ChoiceFormat pattern string.
    448     * @param pattern a ChoiceFormat pattern string
    449     * @param parseError Struct to receive information on the position
    450     *                   of an error within the pattern.
    451     *                   Can be nullptr.
    452     * @param errorCode Standard ICU error code. Its input value must
    453     *                  pass the U_SUCCESS() test, or else the function returns
    454     *                  immediately. Check for U_FAILURE() on output or use with
    455     *                  function chaining. (See User Guide for details.)
    456     * @return *this
    457     * @throws IllegalArgumentException for syntax errors in the pattern string
    458     * @throws IndexOutOfBoundsException if certain limits are exceeded
    459     *         (e.g., argument number too high, argument name too long, etc.)
    460     * @throws NumberFormatException if a number could not be parsed
    461     * @stable ICU 4.8
    462     */
    463    MessagePattern &parseChoiceStyle(const UnicodeString &pattern,
    464                                     UParseError *parseError, UErrorCode &errorCode);
    465 
    466    /**
    467     * Parses a PluralFormat pattern string.
    468     * @param pattern a PluralFormat pattern string
    469     * @param parseError Struct to receive information on the position
    470     *                   of an error within the pattern.
    471     *                   Can be nullptr.
    472     * @param errorCode Standard ICU error code. Its input value must
    473     *                  pass the U_SUCCESS() test, or else the function returns
    474     *                  immediately. Check for U_FAILURE() on output or use with
    475     *                  function chaining. (See User Guide for details.)
    476     * @return *this
    477     * @throws IllegalArgumentException for syntax errors in the pattern string
    478     * @throws IndexOutOfBoundsException if certain limits are exceeded
    479     *         (e.g., argument number too high, argument name too long, etc.)
    480     * @throws NumberFormatException if a number could not be parsed
    481     * @stable ICU 4.8
    482     */
    483    MessagePattern &parsePluralStyle(const UnicodeString &pattern,
    484                                     UParseError *parseError, UErrorCode &errorCode);
    485 
    486    /**
    487     * Parses a SelectFormat pattern string.
    488     * @param pattern a SelectFormat pattern string
    489     * @param parseError Struct to receive information on the position
    490     *                   of an error within the pattern.
    491     *                   Can be nullptr.
    492     * @param errorCode Standard ICU error code. Its input value must
    493     *                  pass the U_SUCCESS() test, or else the function returns
    494     *                  immediately. Check for U_FAILURE() on output or use with
    495     *                  function chaining. (See User Guide for details.)
    496     * @return *this
    497     * @throws IllegalArgumentException for syntax errors in the pattern string
    498     * @throws IndexOutOfBoundsException if certain limits are exceeded
    499     *         (e.g., argument number too high, argument name too long, etc.)
    500     * @throws NumberFormatException if a number could not be parsed
    501     * @stable ICU 4.8
    502     */
    503    MessagePattern &parseSelectStyle(const UnicodeString &pattern,
    504                                     UParseError *parseError, UErrorCode &errorCode);
    505 
    506    /**
    507     * Clears this MessagePattern.
    508     * countParts() will return 0.
    509     * @stable ICU 4.8
    510     */
    511    void clear();
    512 
    513    /**
    514     * Clears this MessagePattern and sets the UMessagePatternApostropheMode.
    515     * countParts() will return 0.
    516     * @param mode The new UMessagePatternApostropheMode.
    517     * @stable ICU 4.8
    518     */
    519    void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) {
    520        clear();
    521        aposMode=mode;
    522    }
    523 
    524    /**
    525     * @param other another object to compare with.
    526     * @return true if this object is equivalent to the other one.
    527     * @stable ICU 4.8
    528     */
    529    bool operator==(const MessagePattern &other) const;
    530 
    531    /**
    532     * @param other another object to compare with.
    533     * @return false if this object is equivalent to the other one.
    534     * @stable ICU 4.8
    535     */
    536    inline bool operator!=(const MessagePattern &other) const {
    537        return !operator==(other);
    538    }
    539 
    540    /**
    541     * @return A hash code for this object.
    542     * @stable ICU 4.8
    543     */
    544    int32_t hashCode() const;
    545 
    546    /**
    547     * @return this instance's UMessagePatternApostropheMode.
    548     * @stable ICU 4.8
    549     */
    550    UMessagePatternApostropheMode getApostropheMode() const {
    551        return aposMode;
    552    }
    553 
    554    // Java has package-private jdkAposMode() here.
    555    // In C++, this is declared in the MessageImpl class.
    556 
    557    /**
    558     * @return the parsed pattern string (null if none was parsed).
    559     * @stable ICU 4.8
    560     */
    561    const UnicodeString &getPatternString() const {
    562        return msg;
    563    }
    564 
    565    /**
    566     * Does the parsed pattern have named arguments like {first_name}?
    567     * @return true if the parsed pattern has at least one named argument.
    568     * @stable ICU 4.8
    569     */
    570    UBool hasNamedArguments() const {
    571        return hasArgNames;
    572    }
    573 
    574    /**
    575     * Does the parsed pattern have numbered arguments like {2}?
    576     * @return true if the parsed pattern has at least one numbered argument.
    577     * @stable ICU 4.8
    578     */
    579    UBool hasNumberedArguments() const {
    580        return hasArgNumbers;
    581    }
    582 
    583    /**
    584     * Validates and parses an argument name or argument number string.
    585     * An argument name must be a "pattern identifier", that is, it must contain
    586     * no Unicode Pattern_Syntax or Pattern_White_Space characters.
    587     * If it only contains ASCII digits, then it must be a small integer with no leading zero.
    588     * @param name Input string.
    589     * @return &gt;=0 if the name is a valid number,
    590     *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
    591     *         ARG_NAME_NOT_VALID (-2) if it is neither.
    592     * @stable ICU 4.8
    593     */
    594    static int32_t validateArgumentName(const UnicodeString &name);
    595 
    596    /**
    597     * Returns a version of the parsed pattern string where each ASCII apostrophe
    598     * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax.
    599     * <p>
    600     * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}."
    601     * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}."
    602     * @return the deep-auto-quoted version of the parsed pattern string.
    603     * @see MessageFormat.autoQuoteApostrophe()
    604     * @stable ICU 4.8
    605     */
    606    UnicodeString autoQuoteApostropheDeep() const;
    607 
    608    class Part;
    609 
    610    /**
    611     * Returns the number of "parts" created by parsing the pattern string.
    612     * Returns 0 if no pattern has been parsed or clear() was called.
    613     * @return the number of pattern parts.
    614     * @stable ICU 4.8
    615     */
    616    int32_t countParts() const {
    617        return partsLength;
    618    }
    619 
    620    /**
    621     * Gets the i-th pattern "part".
    622     * @param i The index of the Part data. (0..countParts()-1)
    623     * @return the i-th pattern "part".
    624     * @stable ICU 4.8
    625     */
    626    const Part &getPart(int32_t i) const {
    627        return parts[i];
    628    }
    629 
    630    /**
    631     * Returns the UMessagePatternPartType of the i-th pattern "part".
    632     * Convenience method for getPart(i).getType().
    633     * @param i The index of the Part data. (0..countParts()-1)
    634     * @return The UMessagePatternPartType of the i-th Part.
    635     * @stable ICU 4.8
    636     */
    637    UMessagePatternPartType getPartType(int32_t i) const {
    638        return getPart(i).type;
    639    }
    640 
    641    /**
    642     * Returns the pattern index of the specified pattern "part".
    643     * Convenience method for getPart(partIndex).getIndex().
    644     * @param partIndex The index of the Part data. (0..countParts()-1)
    645     * @return The pattern index of this Part.
    646     * @stable ICU 4.8
    647     */
    648    int32_t getPatternIndex(int32_t partIndex) const {
    649        return getPart(partIndex).index;
    650    }
    651 
    652    /**
    653     * Returns the substring of the pattern string indicated by the Part.
    654     * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()).
    655     * @param part a part of this MessagePattern.
    656     * @return the substring associated with part.
    657     * @stable ICU 4.8
    658     */
    659    UnicodeString getSubstring(const Part &part) const {
    660        return msg.tempSubString(part.index, part.length);
    661    }
    662 
    663    /**
    664     * Compares the part's substring with the input string s.
    665     * @param part a part of this MessagePattern.
    666     * @param s a string.
    667     * @return true if getSubstring(part).equals(s).
    668     * @stable ICU 4.8
    669     */
    670    UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
    671        return 0==msg.compare(part.index, part.length, s);
    672    }
    673 
    674    /**
    675     * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
    676     * @param part a part of this MessagePattern.
    677     * @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part.
    678     * @stable ICU 4.8
    679     */
    680    double getNumericValue(const Part &part) const;
    681 
    682    /**
    683     * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
    684     * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
    685     * @return the "offset:" value.
    686     * @stable ICU 4.8
    687     */
    688    double getPluralOffset(int32_t pluralStart) const;
    689 
    690    /**
    691     * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
    692     * @param start The index of some Part data (0..countParts()-1);
    693     *        this Part should be of Type ARG_START or MSG_START.
    694     * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level,
    695     *         or start itself if getPartType(msgStart)!=ARG|MSG_START.
    696     * @stable ICU 4.8
    697     */
    698    int32_t getLimitPartIndex(int32_t start) const {
    699        int32_t limit=getPart(start).limitPartIndex;
    700        if(limit<start) {
    701            return start;
    702        }
    703        return limit;
    704    }
    705 
    706    /**
    707     * A message pattern "part", representing a pattern parsing event.
    708     * There is a part for the start and end of a message or argument,
    709     * for quoting and escaping of and with ASCII apostrophes,
    710     * and for syntax elements of "complex" arguments.
    711     * @stable ICU 4.8
    712     */
    713    class Part : public UMemory {
    714    public:
    715        /**
    716         * Default constructor, do not use.
    717         * @internal
    718         */
    719        Part() {}
    720 
    721        /**
    722         * Returns the type of this part.
    723         * @return the part type.
    724         * @stable ICU 4.8
    725         */
    726        UMessagePatternPartType getType() const {
    727            return type;
    728        }
    729 
    730        /**
    731         * Returns the pattern string index associated with this Part.
    732         * @return this part's pattern string index.
    733         * @stable ICU 4.8
    734         */
    735        int32_t getIndex() const {
    736            return index;
    737        }
    738 
    739        /**
    740         * Returns the length of the pattern substring associated with this Part.
    741         * This is 0 for some parts.
    742         * @return this part's pattern substring length.
    743         * @stable ICU 4.8
    744         */
    745        int32_t getLength() const {
    746            return length;
    747        }
    748 
    749        /**
    750         * Returns the pattern string limit (exclusive-end) index associated with this Part.
    751         * Convenience method for getIndex()+getLength().
    752         * @return this part's pattern string limit index, same as getIndex()+getLength().
    753         * @stable ICU 4.8
    754         */
    755        int32_t getLimit() const {
    756            return index+length;
    757        }
    758 
    759        /**
    760         * Returns a value associated with this part.
    761         * See the documentation of each part type for details.
    762         * @return the part value.
    763         * @stable ICU 4.8
    764         */
    765        int32_t getValue() const {
    766            return value;
    767        }
    768 
    769        /**
    770         * Returns the argument type if this part is of type ARG_START or ARG_LIMIT,
    771         * otherwise UMSGPAT_ARG_TYPE_NONE.
    772         * @return the argument type for this part.
    773         * @stable ICU 4.8
    774         */
    775        UMessagePatternArgType getArgType() const {
    776            UMessagePatternPartType msgType=getType();
    777            if(msgType ==UMSGPAT_PART_TYPE_ARG_START || msgType ==UMSGPAT_PART_TYPE_ARG_LIMIT) {
    778                return static_cast<UMessagePatternArgType>(value);
    779            } else {
    780                return UMSGPAT_ARG_TYPE_NONE;
    781            }
    782        }
    783 
    784        /**
    785         * Indicates whether the Part type has a numeric value.
    786         * If so, then that numeric value can be retrieved via MessagePattern.getNumericValue().
    787         * @param type The Part type to be tested.
    788         * @return true if the Part type has a numeric value.
    789         * @stable ICU 4.8
    790         */
    791        static UBool hasNumericValue(UMessagePatternPartType type) {
    792            return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE;
    793        }
    794 
    795        /**
    796         * @param other another object to compare with.
    797         * @return true if this object is equivalent to the other one.
    798         * @stable ICU 4.8
    799         */
    800        bool operator==(const Part &other) const;
    801 
    802        /**
    803         * @param other another object to compare with.
    804         * @return false if this object is equivalent to the other one.
    805         * @stable ICU 4.8
    806         */
    807        inline bool operator!=(const Part &other) const {
    808            return !operator==(other);
    809        }
    810 
    811        /**
    812         * @return A hash code for this object.
    813         * @stable ICU 4.8
    814         */
    815        int32_t hashCode() const {
    816            return ((type*37+index)*37+length)*37+value;
    817        }
    818 
    819    private:
    820        friend class MessagePattern;
    821 
    822        static const int32_t MAX_LENGTH=0xffff;
    823        static const int32_t MAX_VALUE=0x7fff;
    824        static const int32_t MAX_NESTED_LEVELS=0x03ff;
    825 
    826        // Some fields are not final because they are modified during pattern parsing.
    827        // After pattern parsing, the parts are effectively immutable.
    828        UMessagePatternPartType type;
    829        int32_t index;
    830        uint16_t length;
    831        int16_t value;
    832        int32_t limitPartIndex;
    833    };
    834 
    835 private:
    836    void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
    837 
    838    void postParse();
    839 
    840    int32_t parseMessage(int32_t index, int32_t msgStartLength,
    841                         int32_t nestingLevel, UMessagePatternArgType parentType,
    842                         UParseError *parseError, UErrorCode &errorCode);
    843 
    844    int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
    845                     UParseError *parseError, UErrorCode &errorCode);
    846 
    847    int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
    848 
    849    int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
    850                             UParseError *parseError, UErrorCode &errorCode);
    851 
    852    int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
    853                                     UParseError *parseError, UErrorCode &errorCode);
    854 
    855    /**
    856     * Validates and parses an argument name or argument number string.
    857     * This internal method assumes that the input substring is a "pattern identifier".
    858     * @return &gt;=0 if the name is a valid number,
    859     *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
    860     *         ARG_NAME_NOT_VALID (-2) if it is neither.
    861     * @see #validateArgumentName(String)
    862     */
    863    static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
    864 
    865    int32_t parseArgNumber(int32_t start, int32_t limit) {
    866        return parseArgNumber(msg, start, limit);
    867    }
    868 
    869    /**
    870     * Parses a number from the specified message substring.
    871     * @param start start index into the message string
    872     * @param limit limit index into the message string, must be start<limit
    873     * @param allowInfinity true if U+221E is allowed (for ChoiceFormat)
    874     * @param parseError
    875     * @param errorCode
    876     */
    877    void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
    878                     UParseError *parseError, UErrorCode &errorCode);
    879 
    880    // Java has package-private appendReducedApostrophes() here.
    881    // In C++, this is declared in the MessageImpl class.
    882 
    883    int32_t skipWhiteSpace(int32_t index);
    884 
    885    int32_t skipIdentifier(int32_t index);
    886 
    887    /**
    888     * Skips a sequence of characters that could occur in a double value.
    889     * Does not fully parse or validate the value.
    890     */
    891    int32_t skipDouble(int32_t index);
    892 
    893    static UBool isArgTypeChar(UChar32 c);
    894 
    895    UBool isChoice(int32_t index);
    896 
    897    UBool isPlural(int32_t index);
    898 
    899    UBool isSelect(int32_t index);
    900 
    901    UBool isOrdinal(int32_t index);
    902 
    903    /**
    904     * @return true if we are inside a MessageFormat (sub-)pattern,
    905     *         as opposed to inside a top-level choice/plural/select pattern.
    906     */
    907    UBool inMessageFormatPattern(int32_t nestingLevel);
    908 
    909    /**
    910     * @return true if we are in a MessageFormat sub-pattern
    911     *         of a top-level ChoiceFormat pattern.
    912     */
    913    UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
    914 
    915    void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
    916                 int32_t value, UErrorCode &errorCode);
    917 
    918    void addLimitPart(int32_t start,
    919                      UMessagePatternPartType type, int32_t index, int32_t length,
    920                      int32_t value, UErrorCode &errorCode);
    921 
    922    void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
    923 
    924    void setParseError(UParseError *parseError, int32_t index);
    925 
    926    UBool init(UErrorCode &errorCode);
    927    UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
    928 
    929    UMessagePatternApostropheMode aposMode;
    930    UnicodeString msg;
    931    // ArrayList<Part> parts=new ArrayList<Part>();
    932    MessagePatternPartsList *partsList;
    933    Part *parts;
    934    int32_t partsLength;
    935    // ArrayList<Double> numericValues;
    936    MessagePatternDoubleList *numericValuesList;
    937    double *numericValues;
    938    int32_t numericValuesLength;
    939    UBool hasArgNames;
    940    UBool hasArgNumbers;
    941    UBool needsAutoQuoting;
    942 };
    943 
    944 U_NAMESPACE_END
    945 
    946 #endif  // !UCONFIG_NO_FORMATTING
    947 
    948 #endif /* U_SHOW_CPLUSPLUS_API */
    949 
    950 #endif  // __MESSAGEPATTERN_H__