tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

messageformat2.h (22106B)


      1 // © 2024 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 #include "unicode/utypes.h"
      5 
      6 #ifndef MESSAGEFORMAT2_H
      7 #define MESSAGEFORMAT2_H
      8 
      9 #if U_SHOW_CPLUSPLUS_API
     10 
     11 #if !UCONFIG_NO_NORMALIZATION
     12 
     13 #if !UCONFIG_NO_FORMATTING
     14 
     15 #if !UCONFIG_NO_MF2
     16 
     17 /**
     18 * \file
     19 * \brief C++ API: Formats messages using the draft MessageFormat 2.0.
     20 */
     21 
     22 #include "unicode/messageformat2_arguments.h"
     23 #include "unicode/messageformat2_data_model.h"
     24 #include "unicode/messageformat2_function_registry.h"
     25 #include "unicode/normalizer2.h"
     26 #include "unicode/unistr.h"
     27 
     28 #ifndef U_HIDE_DEPRECATED_API
     29 
     30 U_NAMESPACE_BEGIN
     31 
     32 namespace message2 {
     33 
     34    class Environment;
     35    class MessageContext;
     36    class StaticErrors;
     37    class InternalValue;
     38 
     39    /**
     40     * <p>MessageFormatter is a Technical Preview API implementing MessageFormat 2.0.
     41     *
     42     * <p>See <a target="github" href="https://github.com/unicode-org/message-format-wg/blob/main/spec/syntax.md">the
     43     * description of the syntax with examples and use cases</a> and the corresponding
     44     * <a target="github" href="https://github.com/unicode-org/message-format-wg/blob/main/spec/message.abnf">ABNF</a> grammar.</p>
     45     *
     46     * The MessageFormatter class is mutable and movable. It is not copyable.
     47     * (It is mutable because if it has a custom function registry, the registry may include
     48     * `FormatterFactory` objects implementing custom formatters, which are allowed to contain
     49     * mutable state.)
     50     *
     51     * @internal ICU 75 technology preview
     52     * @deprecated This API is for technology preview only.
     53     */
     54    class U_I18N_API_CLASS MessageFormatter : public UObject {
     55        // Note: This class does not currently inherit from the existing
     56        // `Format` class.
     57    public:
     58        /**
     59         * Move assignment operator:
     60         * The source MessageFormatter will be left in a valid but undefined state.
     61         *
     62         * @internal ICU 75 technology preview
     63         * @deprecated This API is for technology preview only.
     64         */
     65        U_I18N_API MessageFormatter& operator=(MessageFormatter&&) noexcept;
     66        /**
     67         * Destructor.
     68         *
     69         * @internal ICU 75 technology preview
     70         * @deprecated This API is for technology preview only.
     71         */
     72        U_I18N_API virtual ~MessageFormatter();
     73 
     74        /**
     75         * Formats the message to a string, using the data model that was previously set or parsed,
     76         * and the given `arguments` object.
     77         *
     78         * @param arguments Reference to message arguments
     79         * @param status    Input/output error code used to indicate syntax errors, data model
     80         *                  errors, resolution errors, formatting errors, selection errors, as well
     81         *                  as other errors (such as memory allocation failures). Partial output
     82         *                  is still provided in the presence of most error types.
     83         * @return          The string result of formatting the message with the given arguments.
     84         *
     85         * @internal ICU 75 technology preview
     86         * @deprecated This API is for technology preview only.
     87         */
     88        U_I18N_API UnicodeString formatToString(const MessageArguments& arguments, UErrorCode& status);
     89 
     90        /**
     91         * Not yet implemented; formats the message to a `FormattedMessage` object,
     92         * using the data model that was previously set or parsed,
     93         * and the given `arguments` object.
     94         *
     95         * @param arguments Reference to message arguments
     96         * @param status    Input/output error code used to indicate syntax errors, data model
     97         *                  errors, resolution errors, formatting errors, selection errors, as well
     98         *                  as other errors (such as memory allocation failures). Partial output
     99         *                  is still provided in the presence of most error types.
    100         * @return          The `FormattedMessage` representing the formatted message.
    101         *
    102         * @internal ICU 75 technology preview
    103         * @deprecated This API is for technology preview only.
    104         */
    105        U_I18N_API FormattedMessage format(const MessageArguments& arguments, UErrorCode& status) const {
    106            (void) arguments;
    107            if (U_SUCCESS(status)) {
    108                status = U_UNSUPPORTED_ERROR;
    109            }
    110            return FormattedMessage(status);
    111        }
    112 
    113        /**
    114         * Accesses the locale that this `MessageFormatter` object was created with.
    115         *
    116         * @return A reference to the locale.
    117         *
    118         * @internal ICU 75 technology preview
    119         * @deprecated This API is for technology preview only.
    120         */
    121        U_I18N_API const Locale& getLocale() const { return locale; }
    122 
    123        /**
    124         * Serializes the data model as a string in MessageFormat 2.0 syntax.
    125         *
    126         * @return result    A string representation of the data model.
    127         *                   The string is a valid MessageFormat 2.0 message.
    128         *
    129         * @internal ICU 75 technology preview
    130         * @deprecated This API is for technology preview only.
    131         */
    132        U_I18N_API UnicodeString getPattern() const;
    133 
    134        /**
    135         * Accesses the data model referred to by this
    136         * `MessageFormatter` object.
    137         *
    138         * @return A reference to the data model.
    139         *
    140         * @internal ICU 75 technology preview
    141         * @deprecated This API is for technology preview only.
    142         */
    143        U_I18N_API const MFDataModel& getDataModel() const;
    144 
    145        /**
    146         * Used in conjunction with the
    147         * MessageFormatter::Builder::setErrorHandlingBehavior() method.
    148         *
    149         * @internal ICU 76 technology preview
    150         * @deprecated This API is for technology preview only.
    151         */
    152        typedef enum UMFErrorHandlingBehavior {
    153            /**
    154             * Suppress errors and return best-effort output.
    155             *
    156             * @internal ICU 76 technology preview
    157             * @deprecated This API is for technology preview only.
    158             */
    159            U_MF_BEST_EFFORT = 0,
    160            /**
    161             * Signal all MessageFormat errors using the UErrorCode
    162             * argument.
    163             *
    164             * @internal ICU 76 technology preview
    165             * @deprecated This API is for technology preview only.
    166             */
    167            U_MF_STRICT
    168        } UMFErrorHandlingBehavior;
    169 
    170        /**
    171         * The mutable Builder class allows each part of the MessageFormatter to be initialized
    172         * separately; calling its `build()` method yields an immutable MessageFormatter.
    173         *
    174         * Not copyable or movable.
    175         */
    176        class U_I18N_API_CLASS Builder : public UObject {
    177        private:
    178            friend class MessageFormatter;
    179 
    180            // The pattern to be parsed to generate the formatted message
    181            UnicodeString pattern;
    182            bool hasPattern = false;
    183            bool hasDataModel = false;
    184            // The data model to be used to generate the formatted message
    185            // Initialized either by `setDataModel()`, or by the parser
    186            // through a call to `setPattern()`
    187            MFDataModel dataModel;
    188            // Normalized representation of the pattern;
    189            // ignored if `setPattern()` wasn't called
    190            UnicodeString normalizedInput;
    191            // Errors (internal representation of parse errors)
    192            // Ignored if `setPattern()` wasn't called
    193            StaticErrors* errors;
    194            Locale locale;
    195            // Not owned
    196            const MFFunctionRegistry* customMFFunctionRegistry;
    197            // Error behavior; see comment in `MessageFormatter` class
    198            bool signalErrors = false;
    199 
    200            void clearState();
    201        public:
    202            /**
    203             * Sets the locale to use for formatting.
    204             *
    205             * @param locale The desired locale.
    206             * @return       A reference to the builder.
    207             *
    208             * @internal ICU 75 technology preview
    209             * @deprecated This API is for technology preview only.
    210             */
    211            U_I18N_API Builder& setLocale(const Locale& locale);
    212            /**
    213             * Sets the pattern (contents of the message) and parses it
    214             * into a data model. If a data model was
    215             * previously set, it is removed.
    216             *
    217             * @param pattern A string in MessageFormat 2.0 syntax.
    218             * @param parseError Struct to receive information on the position
    219             *                   of an error within the pattern.
    220             * @param status    Input/output error code. If the
    221             *                  pattern cannot be parsed, set to failure code.
    222             * @return       A reference to the builder.
    223             *
    224             * @internal ICU 75 technology preview
    225             * @deprecated This API is for technology preview only.
    226             */
    227            U_I18N_API Builder& setPattern(const UnicodeString& pattern,
    228                                           UParseError& parseError,
    229                                           UErrorCode& status);
    230            /**
    231             * Sets a custom function registry.
    232             *
    233             * @param functionRegistry Reference to the function registry to use.
    234             *        `functionRegistry` is not copied,
    235             *        and the caller must ensure its lifetime contains
    236             *        the lifetime of the `MessageFormatter` object built by this
    237             *        builder.
    238             * @return       A reference to the builder.
    239             *
    240             * @internal ICU 75 technology preview
    241             * @deprecated This API is for technology preview only.
    242             */
    243            U_I18N_API Builder& setFunctionRegistry(const MFFunctionRegistry& functionRegistry);
    244            /**
    245             * Sets a data model. If a pattern was previously set, it is removed.
    246             *
    247             * @param dataModel Data model to format. Passed by move.
    248             * @return       A reference to the builder.
    249             *
    250             * @internal ICU 75 technology preview
    251             * @deprecated This API is for technology preview only.
    252             */
    253            U_I18N_API Builder& setDataModel(MFDataModel&& dataModel);
    254            /**
    255             * Set the error handling behavior for this formatter.
    256             *
    257             * "Strict" error behavior means that that formatting methods
    258             * will set their UErrorCode arguments to signal MessageFormat
    259             * data model, resolution, and runtime errors. Syntax errors are
    260             * always signaled.
    261             *
    262             * "Best effort" error behavior means that MessageFormat errors are
    263             * suppressed:  formatting methods will _not_ set their
    264             * UErrorCode arguments to signal MessageFormat data model,
    265             * resolution, or runtime errors. Best-effort output
    266             * will be returned. Syntax errors are always signaled.
    267             * This is the default behavior.
    268             *
    269             * @param type An enum with type UMFErrorHandlingBehavior;
    270             *             if type == `U_MF_STRICT`, then
    271             *             errors are handled strictly.
    272             *             If type == `U_MF_BEST_EFFORT`, then
    273             *             best-effort output is returned.
    274             *
    275             * The default is to suppress all MessageFormat errors
    276             * and return best-effort output.
    277             *
    278             * @return       A reference to the builder.
    279             *
    280             * @internal ICU 76 technology preview
    281             * @deprecated This API is for technology preview only.
    282             */
    283            U_I18N_API Builder& setErrorHandlingBehavior(UMFErrorHandlingBehavior type);
    284            /**
    285             * Constructs a new immutable MessageFormatter using the pattern or data model
    286             * that was previously set, and the locale (if it was previously set)
    287             * or default locale (otherwise).
    288             *
    289             * The builder object (`this`) can still be used after calling `build()`.
    290             *
    291             * @param status    Input/output error code.  If neither the pattern
    292             *                  nor the data model is set, set to failure code.
    293             * @return          The new MessageFormatter object
    294             *
    295             * @internal ICU 75 technology preview
    296             * @deprecated This API is for technology preview only.
    297             */
    298            U_I18N_API MessageFormatter build(UErrorCode& status) const;
    299            /**
    300             * Default constructor.
    301             * Returns a Builder with the default locale and with no
    302             * data model or pattern set. Either `setPattern()`
    303             * or `setDataModel()` has to be called before calling `build()`.
    304             *
    305             * @param status    Input/output error code.
    306             *
    307             * @internal ICU 75 technology preview
    308             * @deprecated This API is for technology preview only.
    309             */
    310            U_I18N_API Builder(UErrorCode& status);
    311            /**
    312             * Destructor.
    313             *
    314             * @internal ICU 75 technology preview
    315             * @deprecated This API is for technology preview only.
    316             */
    317            U_I18N_API virtual ~Builder();
    318        }; // class MessageFormatter::Builder
    319 
    320        // TODO: Shouldn't be public; only used for testing
    321        /**
    322         * Returns a string consisting of the input with optional spaces removed.
    323         *
    324         * @return        A normalized string representation of the input
    325         *
    326         * @internal ICU 75 technology preview
    327         * @deprecated This API is for technology preview only.
    328         */
    329        U_I18N_API const UnicodeString& getNormalizedPattern() const { return normalizedInput; }
    330 
    331    private:
    332        friend class Builder;
    333        friend class Checker;
    334        friend class MessageArguments;
    335        friend class MessageContext;
    336 
    337        MessageFormatter(const MessageFormatter::Builder& builder, UErrorCode &status);
    338 
    339        MessageFormatter() = delete; // default constructor not implemented
    340 
    341        // Do not define default assignment operator
    342        const MessageFormatter &operator=(const MessageFormatter &) = delete;
    343 
    344        // Selection methods
    345 
    346        // Takes a vector of FormattedPlaceholders
    347        void resolveSelectors(MessageContext&, const Environment& env, UErrorCode&, UVector&) const;
    348        // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (output)
    349        void filterVariants(const UVector&, UVector&, UErrorCode&) const;
    350        // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (input/output)
    351        void sortVariants(const UVector&, UVector&, UErrorCode&) const;
    352        // Takes a vector of strings (input) and a vector of strings (output)
    353        void matchSelectorKeys(const UVector&, MessageContext&, InternalValue* rv, UVector&, UErrorCode&) const;
    354        // Takes a vector of FormattedPlaceholders (input),
    355        // and a vector of vectors of strings (output)
    356        void resolvePreferences(MessageContext&, UVector&, UVector&, UErrorCode&) const;
    357 
    358        // Formatting methods
    359 
    360        [[nodiscard]] FormattedPlaceholder formatLiteral(const UnicodeString&, const data_model::Literal&) const;
    361        void formatPattern(MessageContext&, const Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const;
    362        // Evaluates a function call
    363        // Dispatches on argument type
    364        [[nodiscard]] InternalValue* evalFunctionCall(FormattedPlaceholder&& argument,
    365                                                     MessageContext& context,
    366                                                     UErrorCode& status) const;
    367        // Dispatches on function name
    368        [[nodiscard]] InternalValue* evalFunctionCall(const FunctionName& functionName,
    369                                                     InternalValue* argument,
    370                                                     FunctionOptions&& options,
    371                                                     MessageContext& context,
    372                                                     UErrorCode& status) const;
    373        // Formats an expression that appears in a pattern or as the definition of a local variable
    374        [[nodiscard]] InternalValue* formatExpression(const UnicodeString&,
    375                                                      const Environment&,
    376                                                      const data_model::Expression&,
    377                                                      MessageContext&,
    378                                                      UErrorCode&) const;
    379        [[nodiscard]] FunctionOptions resolveOptions(const Environment& env, const OptionMap&, MessageContext&, UErrorCode&) const;
    380        [[nodiscard]] InternalValue* formatOperand(const UnicodeString&,
    381                                                   const Environment&,
    382                                                   const data_model::Operand&,
    383                                                   MessageContext&,
    384                                                   UErrorCode&) const;
    385        [[nodiscard]] FormattedPlaceholder evalArgument(const UnicodeString&,
    386                                                        const data_model::VariableName&,
    387                                                        MessageContext&,
    388                                                        UErrorCode&) const;
    389        void formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const;
    390 
    391        // Function registry methods
    392        bool hasCustomMFFunctionRegistry() const {
    393            return (customMFFunctionRegistry != nullptr);
    394        }
    395 
    396        // Precondition: custom function registry exists
    397        // Note: this is non-const because the values in the MFFunctionRegistry are mutable
    398        // (a FormatterFactory can have mutable state)
    399        const MFFunctionRegistry& getCustomMFFunctionRegistry() const;
    400 
    401        bool isCustomFormatter(const FunctionName&) const;
    402        FormatterFactory* lookupFormatterFactory(const FunctionName&, UErrorCode& status) const;
    403        bool isBuiltInSelector(const FunctionName&) const;
    404        bool isBuiltInFormatter(const FunctionName&) const;
    405        bool isCustomSelector(const FunctionName&) const;
    406        const SelectorFactory* lookupSelectorFactory(MessageContext&, const FunctionName&, UErrorCode&) const;
    407        bool isSelector(const FunctionName& fn) const { return isBuiltInSelector(fn) || isCustomSelector(fn); }
    408        bool isFormatter(const FunctionName& fn) const { return isBuiltInFormatter(fn) || isCustomFormatter(fn); }
    409        const Formatter* lookupFormatter(const FunctionName&, UErrorCode&) const;
    410 
    411        Selector* getSelector(MessageContext&, const FunctionName&, UErrorCode&) const;
    412        Formatter* getFormatter(const FunctionName&, UErrorCode&) const;
    413        bool getDefaultFormatterNameByType(const UnicodeString&, FunctionName&) const;
    414 
    415        // Checking for resolution errors
    416        void checkDeclarations(MessageContext&, Environment*&, UErrorCode&) const;
    417        void check(MessageContext&, const Environment&, const data_model::Expression&, UErrorCode&) const;
    418        void check(MessageContext&, const Environment&, const data_model::Operand&, UErrorCode&) const;
    419        void check(MessageContext&, const Environment&, const OptionMap&, UErrorCode&) const;
    420 
    421        void initErrors(UErrorCode&);
    422        void clearErrors() const;
    423        void cleanup() noexcept;
    424 
    425        // The locale this MessageFormatter was created with
    426        /* const */ Locale locale;
    427 
    428        // Registry for built-in functions
    429        MFFunctionRegistry standardMFFunctionRegistry;
    430        // Registry for custom functions; may be null if no custom registry supplied
    431        // Note: this is *not* owned by the MessageFormatter object
    432        // The reason for this choice is to have a non-destructive MessageFormatter::Builder,
    433        // while also not requiring the function registry to be deeply-copyable. Making the
    434        // function registry copyable would impose a requirement on any implementations
    435        // of the FormatterFactory and SelectorFactory interfaces to implement a custom
    436        // clone() method, which is necessary to avoid sharing between copies of the
    437        // function registry (and thus double-frees)
    438        // Not deeply immutable (the values in the function registry are mutable,
    439        // as a FormatterFactory can have mutable state
    440        const MFFunctionRegistry* customMFFunctionRegistry;
    441 
    442        // Data model, representing the parsed message
    443        MFDataModel dataModel;
    444 
    445        // Normalized version of the input string (optional whitespace removed)
    446        UnicodeString normalizedInput;
    447 
    448        // Errors -- only used while parsing and checking for data model errors; then
    449        // the MessageContext keeps track of errors
    450        // Must be a raw pointer to avoid including the internal header file
    451        // defining StaticErrors
    452        // Owned by `this`
    453        StaticErrors* errors = nullptr;
    454 
    455        // Error handling behavior.
    456        // If true, then formatting methods set their UErrorCode arguments
    457        // to signal MessageFormat errors, and no useful output is returned.
    458        // If false, then MessageFormat errors are not signaled and the
    459        // formatting methods return best-effort output.
    460        // The default is false.
    461        bool signalErrors = false;
    462 
    463    }; // class MessageFormatter
    464 
    465 } // namespace message2
    466 
    467 U_NAMESPACE_END
    468 
    469 #endif // U_HIDE_DEPRECATED_API
    470 
    471 #endif /* #if !UCONFIG_NO_MF2 */
    472 
    473 #endif /* #if !UCONFIG_NO_FORMATTING */
    474 
    475 #endif /* #if !UCONFIG_NO_NORMALIZATION */
    476 
    477 #endif /* U_SHOW_CPLUSPLUS_API */
    478 
    479 #endif // MESSAGEFORMAT2_H
    480 
    481 // eof