tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

selfmt.h (14691B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /********************************************************************
      4 * COPYRIGHT:
      5 * Copyright (c) 1997-2011, International Business Machines Corporation and
      6 * others. All Rights Reserved.
      7 * Copyright (C) 2010 , Yahoo! Inc.
      8 ********************************************************************
      9 *
     10 * File SELFMT.H
     11 *
     12 * Modification History:
     13 *
     14 *   Date        Name        Description
     15 *   11/11/09    kirtig      Finished first cut of implementation.
     16 ********************************************************************/
     17 
     18 #ifndef SELFMT
     19 #define SELFMT
     20 
     21 #include "unicode/utypes.h"
     22 
     23 #if U_SHOW_CPLUSPLUS_API
     24 
     25 #include "unicode/messagepattern.h"
     26 #include "unicode/numfmt.h"
     27 
     28 /**
     29 * \file
     30 * \brief C++ API: SelectFormat object
     31 */
     32 
     33 #if !UCONFIG_NO_FORMATTING
     34 
     35 U_NAMESPACE_BEGIN
     36 
     37 class MessageFormat;
     38 
     39 /**
     40  * <p><code>SelectFormat</code> supports the creation of  internationalized
     41  * messages by selecting phrases based on keywords. The pattern  specifies
     42  * how to map keywords to phrases and provides a default phrase. The
     43  * object provided to the format method is a string that's matched
     44  * against the keywords. If there is a match, the corresponding phrase
     45  * is selected; otherwise, the default phrase is used.</p>
     46  *
     47  * <h4>Using <code>SelectFormat</code> for Gender Agreement</h4>
     48  *
     49  * <p>Note: Typically, select formatting is done via <code>MessageFormat</code>
     50  * with a <code>select</code> argument type,
     51  * rather than using a stand-alone <code>SelectFormat</code>.</p>
     52  *
     53  * <p>The main use case for the select format is gender based  inflection.
     54  * When names or nouns are inserted into sentences, their gender can  affect pronouns,
     55  * verb forms, articles, and adjectives. Special care needs to be
     56  * taken for the case where the gender cannot be determined.
     57  * The impact varies between languages:</p>
     58  * \htmlonly
     59  * <ul>
     60  * <li>English has three genders, and unknown gender is handled as a  special
     61  * case. Names use the gender of the named person (if known), nouns  referring
     62  * to people use natural gender, and inanimate objects are usually  neutral.
     63  * The gender only affects pronouns: "he", "she", "it", "they".
     64  *
     65  * <li>German differs from English in that the gender of nouns is  rather
     66  * arbitrary, even for nouns referring to people ("M&#x00E4;dchen", girl, is  neutral).
     67  * The gender affects pronouns ("er", "sie", "es"), articles ("der",  "die",
     68  * "das"), and adjective forms ("guter Mann", "gute Frau", "gutes  M&#x00E4;dchen").
     69  *
     70  * <li>French has only two genders; as in German the gender of nouns
     71  * is rather arbitrary - for sun and moon, the genders
     72  * are the opposite of those in German. The gender affects
     73  * pronouns ("il", "elle"), articles ("le", "la"),
     74  * adjective forms ("bon", "bonne"), and sometimes
     75  * verb forms ("all&#x00E9;", "all&#x00E9;e").
     76  *
     77  * <li>Polish distinguishes five genders (or noun classes),
     78  * human masculine, animate non-human masculine, inanimate masculine,
     79  * feminine, and neuter.
     80  * </ul>
     81  * \endhtmlonly
     82  * <p>Some other languages have noun classes that are not related to  gender,
     83  * but similar in grammatical use.
     84  * Some African languages have around 20 noun classes.</p>
     85  *
     86  * <p><b>Note:</b>For the gender of a <i>person</i> in a given sentence,
     87  * we usually need to distinguish only between female, male and other/unknown.</p>
     88  *
     89  * <p>To enable localizers to create sentence patterns that take their
     90  * language's gender dependencies into consideration, software has to  provide
     91  * information about the gender associated with a noun or name to
     92  * <code>MessageFormat</code>.
     93  * Two main cases can be distinguished:</p>
     94  *
     95  * <ul>
     96  * <li>For people, natural gender information should be maintained  for each person.
     97  * Keywords like "male", "female", "mixed" (for groups of people)
     98  * and "unknown" could be used.
     99  *
    100  * <li>For nouns, grammatical gender information should be maintained  for
    101  * each noun and per language, e.g., in resource bundles.
    102  * The keywords "masculine", "feminine", and "neuter" are commonly  used,
    103  * but some languages may require other keywords.
    104  * </ul>
    105  *
    106  * <p>The resulting keyword is provided to <code>MessageFormat</code>  as a
    107  * parameter separate from the name or noun it's associated with. For  example,
    108  * to generate a message such as "Jean went to Paris", three separate  arguments
    109  * would be provided: The name of the person as argument 0, the  gender of
    110  * the person as argument 1, and the name of the city as argument 2.
    111  * The sentence pattern for English, where the gender of the person has
    112  * no impact on this simple sentence, would not refer to argument 1  at all:</p>
    113  *
    114  * <pre>{0} went to {2}.</pre>
    115  *
    116  * <p><b>Note:</b> The entire sentence should be included (and partially repeated)
    117  * inside each phrase. Otherwise translators would have to be trained on how to
    118  * move bits of the sentence in and out of the select argument of a message.
    119  * (The examples below do not follow this recommendation!)</p>
    120  *
    121  * <p>The sentence pattern for French, where the gender of the person affects
    122  * the form of the participle, uses a select format based on argument 1:</p>
    123  *
    124  * \htmlonly<pre>{0} est {1, select, female {all&#x00E9;e} other {all&#x00E9;}} &#x00E0; {2}.</pre>\endhtmlonly
    125  *
    126  * <p>Patterns can be nested, so that it's possible to handle  interactions of
    127  * number and gender where necessary. For example, if the above  sentence should
    128  * allow for the names of several people to be inserted, the  following sentence
    129  * pattern can be used (with argument 0 the list of people's names,
    130  * argument 1 the number of people, argument 2 their combined gender, and
    131  * argument 3 the city name):</p>
    132  *
    133  * \htmlonly
    134  * <pre>{0} {1, plural,
    135  *                 one {est {2, select, female {all&#x00E9;e} other  {all&#x00E9;}}}
    136  *                 other {sont {2, select, female {all&#x00E9;es} other {all&#x00E9;s}}}
    137  *          }&#x00E0; {3}.</pre>
    138  * \endhtmlonly
    139  *
    140  * <h4>Patterns and Their Interpretation</h4>
    141  *
    142  * <p>The <code>SelectFormat</code> pattern string defines the phrase output
    143  * for each user-defined keyword.
    144  * The pattern is a sequence of (keyword, message) pairs.
    145  * A keyword is a "pattern identifier": [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+</p>
    146  *
    147  * <p>Each message is a MessageFormat pattern string enclosed in {curly braces}.</p>
    148  *
    149  * <p>You always have to define a phrase for the default keyword
    150  * <code>other</code>; this phrase is returned when the keyword
    151  * provided to
    152  * the <code>format</code> method matches no other keyword.
    153  * If a pattern does not provide a phrase for <code>other</code>, the  method
    154  * it's provided to returns the error  <code>U_DEFAULT_KEYWORD_MISSING</code>.
    155  * <br>
    156  * Pattern_White_Space between keywords and messages is ignored.
    157  * Pattern_White_Space within a message is preserved and output.</p>
    158  *
    159  * <p><pre>Example:
    160  * \htmlonly
    161  *
    162  * UErrorCode status = U_ZERO_ERROR;
    163  * MessageFormat *msgFmt = new MessageFormat(UnicodeString("{0} est  {1, select, female {all&#x00E9;e} other {all&#x00E9;}} &#x00E0; Paris."), Locale("fr"),  status);
    164  * if (U_FAILURE(status)) {
    165  *       return;
    166  * }
    167  * FieldPosition ignore(FieldPosition::DONT_CARE);
    168  * UnicodeString result;
    169  *
    170  * char* str1= "Kirti,female";
    171  * Formattable args1[] = {"Kirti","female"};
    172  * msgFmt->format(args1, 2, result, ignore, status);
    173  * cout << "Input is " << str1 << " and result is: " << result << endl;
    174  * delete msgFmt;
    175  *
    176  * \endhtmlonly
    177  * </pre>
    178  * </p>
    179  *
    180  * Produces the output:<br>
    181  * \htmlonly
    182  * <code>Kirti est all&#x00E9;e &#x00E0; Paris.</code>
    183  * \endhtmlonly
    184  *
    185  * @stable ICU 4.4
    186  */
    187 
    188 class U_I18N_API SelectFormat : public Format {
    189 public:
    190 
    191    /**
    192     * Creates a new <code>SelectFormat</code> for a given pattern string.
    193     * @param  pattern the pattern for this <code>SelectFormat</code>.
    194     *                 errors are returned to status if the pattern is invalid.
    195     * @param status   output param set to success/failure code on exit, which
    196     *                 must not indicate a failure before the function call.
    197     * @stable ICU 4.4
    198     */
    199    SelectFormat(const UnicodeString& pattern, UErrorCode& status);
    200 
    201    /**
    202     * copy constructor.
    203     * @stable ICU 4.4
    204     */
    205    SelectFormat(const SelectFormat& other);
    206 
    207    /**
    208     * Destructor.
    209     * @stable ICU 4.4
    210     */
    211    virtual ~SelectFormat();
    212 
    213    /**
    214     * Sets the pattern used by this select format.
    215     * for the keyword rules.
    216     * Patterns and their interpretation are specified in the class description.
    217     *
    218     * @param pattern the pattern for this select format
    219     *                errors are returned to status if the pattern is invalid.
    220     * @param status  output param set to success/failure code on exit, which
    221     *                must not indicate a failure before the function call.
    222     * @stable ICU 4.4
    223     */
    224    void applyPattern(const UnicodeString& pattern, UErrorCode& status);
    225 
    226 
    227    using Format::format;
    228 
    229    /**
    230     * Selects the phrase for  the given keyword
    231     *
    232     * @param keyword  The keyword that is used to select an alternative.
    233     * @param appendTo output parameter to receive result.
    234     *                 result is appended to existing contents.
    235     * @param pos      On input: an alignment field, if desired.
    236     *                 On output: the offsets of the alignment field.
    237     * @param status  output param set to success/failure code on exit, which
    238     *                 must not indicate a failure before the function call.
    239     * @return         Reference to 'appendTo' parameter.
    240     * @stable ICU 4.4
    241     */
    242    UnicodeString& format(const UnicodeString& keyword,
    243                            UnicodeString& appendTo,
    244                            FieldPosition& pos,
    245                            UErrorCode& status) const;
    246 
    247    /**
    248     * Assignment operator
    249     *
    250     * @param other    the SelectFormat object to copy from.
    251     * @stable ICU 4.4
    252     */
    253    SelectFormat& operator=(const SelectFormat& other);
    254 
    255    /**
    256     * Return true if another object is semantically equal to this one.
    257     *
    258     * @param other    the SelectFormat object to be compared with.
    259     * @return         true if other is semantically equal to this.
    260     * @stable ICU 4.4
    261     */
    262    virtual bool operator==(const Format& other) const override;
    263 
    264    /**
    265     * Return true if another object is semantically unequal to this one.
    266     *
    267     * @param other    the SelectFormat object to be compared with.
    268     * @return         true if other is semantically unequal to this.
    269     * @stable ICU 4.4
    270     */
    271    virtual bool operator!=(const Format& other) const;
    272 
    273    /**
    274     * Clones this Format object polymorphically.  The caller owns the
    275     * result and should delete it when done.
    276     * @stable ICU 4.4
    277     */
    278    virtual SelectFormat* clone() const override;
    279 
    280    /**
    281     * Format an object to produce a string.
    282     * This method handles keyword strings.
    283     * If the Formattable object is not a <code>UnicodeString</code>,
    284     * then it returns a failing UErrorCode.
    285     *
    286     * @param obj       A keyword string that is used to select an alternative.
    287     * @param appendTo  output parameter to receive result.
    288     *                  Result is appended to existing contents.
    289     * @param pos       On input: an alignment field, if desired.
    290     *                  On output: the offsets of the alignment field.
    291     * @param status    output param filled with success/failure status.
    292     * @return          Reference to 'appendTo' parameter.
    293     * @stable ICU 4.4
    294     */
    295    UnicodeString& format(const Formattable& obj,
    296                         UnicodeString& appendTo,
    297                         FieldPosition& pos,
    298                         UErrorCode& status) const override;
    299 
    300    /**
    301     * Returns the pattern from applyPattern() or constructor.
    302     *
    303     * @param  appendTo  output parameter to receive result.
    304     *                  Result is appended to existing contents.
    305     * @return the UnicodeString with inserted pattern.
    306     * @stable ICU 4.4
    307     */
    308    UnicodeString& toPattern(UnicodeString& appendTo);
    309 
    310    /**
    311     * This method is not yet supported by <code>SelectFormat</code>.
    312     * <P>
    313     * Before calling, set parse_pos.index to the offset you want to start
    314     * parsing at in the source. After calling, parse_pos.index is the end of
    315     * the text you parsed. If error occurs, index is unchanged.
    316     * <P>
    317     * When parsing, leading whitespace is discarded (with a successful parse),
    318     * while trailing whitespace is left as is.
    319     * <P>
    320     * See Format::parseObject() for more.
    321     *
    322     * @param source     The string to be parsed into an object.
    323     * @param result     Formattable to be set to the parse result.
    324     *     If parse fails, return contents are undefined.
    325     * @param parse_pos The position to start parsing at. Upon return
    326     *     this param is set to the position after the
    327     *     last character successfully parsed. If the
    328     *     source is not parsed successfully, this param
    329     *     will remain unchanged.
    330     * @stable ICU 4.4
    331     */
    332    virtual void parseObject(const UnicodeString& source,
    333                            Formattable& result,
    334                            ParsePosition& parse_pos) const override;
    335 
    336    /**
    337     * ICU "poor man's RTTI", returns a UClassID for this class.
    338     * @stable ICU 4.4
    339     */
    340    static UClassID U_EXPORT2 getStaticClassID();
    341 
    342    /**
    343     * ICU "poor man's RTTI", returns a UClassID for the actual class.
    344     * @stable ICU 4.4
    345     */
    346    virtual UClassID getDynamicClassID() const override;
    347 
    348 private:
    349    friend class MessageFormat;
    350 
    351    SelectFormat() = delete;   // default constructor not implemented.
    352 
    353    /**
    354     * Finds the SelectFormat sub-message for the given keyword, or the "other" sub-message.
    355     * @param pattern A MessagePattern.
    356     * @param partIndex the index of the first SelectFormat argument style part.
    357     * @param keyword a keyword to be matched to one of the SelectFormat argument's keywords.
    358     * @param ec Error code.
    359     * @return the sub-message start part index.
    360     */
    361    static int32_t findSubMessage(const MessagePattern& pattern, int32_t partIndex,
    362                                  const UnicodeString& keyword, UErrorCode& ec);
    363 
    364    MessagePattern msgPattern;
    365 };
    366 
    367 U_NAMESPACE_END
    368 
    369 #endif /* #if !UCONFIG_NO_FORMATTING */
    370 
    371 #endif /* U_SHOW_CPLUSPLUS_API */
    372 
    373 #endif // _SELFMT
    374 //eof