messageformat2.h (22106B)
1 // © 2024 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #include "unicode/utypes.h" 5 6 #ifndef MESSAGEFORMAT2_H 7 #define MESSAGEFORMAT2_H 8 9 #if U_SHOW_CPLUSPLUS_API 10 11 #if !UCONFIG_NO_NORMALIZATION 12 13 #if !UCONFIG_NO_FORMATTING 14 15 #if !UCONFIG_NO_MF2 16 17 /** 18 * \file 19 * \brief C++ API: Formats messages using the draft MessageFormat 2.0. 20 */ 21 22 #include "unicode/messageformat2_arguments.h" 23 #include "unicode/messageformat2_data_model.h" 24 #include "unicode/messageformat2_function_registry.h" 25 #include "unicode/normalizer2.h" 26 #include "unicode/unistr.h" 27 28 #ifndef U_HIDE_DEPRECATED_API 29 30 U_NAMESPACE_BEGIN 31 32 namespace message2 { 33 34 class Environment; 35 class MessageContext; 36 class StaticErrors; 37 class InternalValue; 38 39 /** 40 * <p>MessageFormatter is a Technical Preview API implementing MessageFormat 2.0. 41 * 42 * <p>See <a target="github" href="https://github.com/unicode-org/message-format-wg/blob/main/spec/syntax.md">the 43 * description of the syntax with examples and use cases</a> and the corresponding 44 * <a target="github" href="https://github.com/unicode-org/message-format-wg/blob/main/spec/message.abnf">ABNF</a> grammar.</p> 45 * 46 * The MessageFormatter class is mutable and movable. It is not copyable. 47 * (It is mutable because if it has a custom function registry, the registry may include 48 * `FormatterFactory` objects implementing custom formatters, which are allowed to contain 49 * mutable state.) 50 * 51 * @internal ICU 75 technology preview 52 * @deprecated This API is for technology preview only. 53 */ 54 class U_I18N_API_CLASS MessageFormatter : public UObject { 55 // Note: This class does not currently inherit from the existing 56 // `Format` class. 57 public: 58 /** 59 * Move assignment operator: 60 * The source MessageFormatter will be left in a valid but undefined state. 61 * 62 * @internal ICU 75 technology preview 63 * @deprecated This API is for technology preview only. 64 */ 65 U_I18N_API MessageFormatter& operator=(MessageFormatter&&) noexcept; 66 /** 67 * Destructor. 68 * 69 * @internal ICU 75 technology preview 70 * @deprecated This API is for technology preview only. 71 */ 72 U_I18N_API virtual ~MessageFormatter(); 73 74 /** 75 * Formats the message to a string, using the data model that was previously set or parsed, 76 * and the given `arguments` object. 77 * 78 * @param arguments Reference to message arguments 79 * @param status Input/output error code used to indicate syntax errors, data model 80 * errors, resolution errors, formatting errors, selection errors, as well 81 * as other errors (such as memory allocation failures). Partial output 82 * is still provided in the presence of most error types. 83 * @return The string result of formatting the message with the given arguments. 84 * 85 * @internal ICU 75 technology preview 86 * @deprecated This API is for technology preview only. 87 */ 88 U_I18N_API UnicodeString formatToString(const MessageArguments& arguments, UErrorCode& status); 89 90 /** 91 * Not yet implemented; formats the message to a `FormattedMessage` object, 92 * using the data model that was previously set or parsed, 93 * and the given `arguments` object. 94 * 95 * @param arguments Reference to message arguments 96 * @param status Input/output error code used to indicate syntax errors, data model 97 * errors, resolution errors, formatting errors, selection errors, as well 98 * as other errors (such as memory allocation failures). Partial output 99 * is still provided in the presence of most error types. 100 * @return The `FormattedMessage` representing the formatted message. 101 * 102 * @internal ICU 75 technology preview 103 * @deprecated This API is for technology preview only. 104 */ 105 U_I18N_API FormattedMessage format(const MessageArguments& arguments, UErrorCode& status) const { 106 (void) arguments; 107 if (U_SUCCESS(status)) { 108 status = U_UNSUPPORTED_ERROR; 109 } 110 return FormattedMessage(status); 111 } 112 113 /** 114 * Accesses the locale that this `MessageFormatter` object was created with. 115 * 116 * @return A reference to the locale. 117 * 118 * @internal ICU 75 technology preview 119 * @deprecated This API is for technology preview only. 120 */ 121 U_I18N_API const Locale& getLocale() const { return locale; } 122 123 /** 124 * Serializes the data model as a string in MessageFormat 2.0 syntax. 125 * 126 * @return result A string representation of the data model. 127 * The string is a valid MessageFormat 2.0 message. 128 * 129 * @internal ICU 75 technology preview 130 * @deprecated This API is for technology preview only. 131 */ 132 U_I18N_API UnicodeString getPattern() const; 133 134 /** 135 * Accesses the data model referred to by this 136 * `MessageFormatter` object. 137 * 138 * @return A reference to the data model. 139 * 140 * @internal ICU 75 technology preview 141 * @deprecated This API is for technology preview only. 142 */ 143 U_I18N_API const MFDataModel& getDataModel() const; 144 145 /** 146 * Used in conjunction with the 147 * MessageFormatter::Builder::setErrorHandlingBehavior() method. 148 * 149 * @internal ICU 76 technology preview 150 * @deprecated This API is for technology preview only. 151 */ 152 typedef enum UMFErrorHandlingBehavior { 153 /** 154 * Suppress errors and return best-effort output. 155 * 156 * @internal ICU 76 technology preview 157 * @deprecated This API is for technology preview only. 158 */ 159 U_MF_BEST_EFFORT = 0, 160 /** 161 * Signal all MessageFormat errors using the UErrorCode 162 * argument. 163 * 164 * @internal ICU 76 technology preview 165 * @deprecated This API is for technology preview only. 166 */ 167 U_MF_STRICT 168 } UMFErrorHandlingBehavior; 169 170 /** 171 * The mutable Builder class allows each part of the MessageFormatter to be initialized 172 * separately; calling its `build()` method yields an immutable MessageFormatter. 173 * 174 * Not copyable or movable. 175 */ 176 class U_I18N_API_CLASS Builder : public UObject { 177 private: 178 friend class MessageFormatter; 179 180 // The pattern to be parsed to generate the formatted message 181 UnicodeString pattern; 182 bool hasPattern = false; 183 bool hasDataModel = false; 184 // The data model to be used to generate the formatted message 185 // Initialized either by `setDataModel()`, or by the parser 186 // through a call to `setPattern()` 187 MFDataModel dataModel; 188 // Normalized representation of the pattern; 189 // ignored if `setPattern()` wasn't called 190 UnicodeString normalizedInput; 191 // Errors (internal representation of parse errors) 192 // Ignored if `setPattern()` wasn't called 193 StaticErrors* errors; 194 Locale locale; 195 // Not owned 196 const MFFunctionRegistry* customMFFunctionRegistry; 197 // Error behavior; see comment in `MessageFormatter` class 198 bool signalErrors = false; 199 200 void clearState(); 201 public: 202 /** 203 * Sets the locale to use for formatting. 204 * 205 * @param locale The desired locale. 206 * @return A reference to the builder. 207 * 208 * @internal ICU 75 technology preview 209 * @deprecated This API is for technology preview only. 210 */ 211 U_I18N_API Builder& setLocale(const Locale& locale); 212 /** 213 * Sets the pattern (contents of the message) and parses it 214 * into a data model. If a data model was 215 * previously set, it is removed. 216 * 217 * @param pattern A string in MessageFormat 2.0 syntax. 218 * @param parseError Struct to receive information on the position 219 * of an error within the pattern. 220 * @param status Input/output error code. If the 221 * pattern cannot be parsed, set to failure code. 222 * @return A reference to the builder. 223 * 224 * @internal ICU 75 technology preview 225 * @deprecated This API is for technology preview only. 226 */ 227 U_I18N_API Builder& setPattern(const UnicodeString& pattern, 228 UParseError& parseError, 229 UErrorCode& status); 230 /** 231 * Sets a custom function registry. 232 * 233 * @param functionRegistry Reference to the function registry to use. 234 * `functionRegistry` is not copied, 235 * and the caller must ensure its lifetime contains 236 * the lifetime of the `MessageFormatter` object built by this 237 * builder. 238 * @return A reference to the builder. 239 * 240 * @internal ICU 75 technology preview 241 * @deprecated This API is for technology preview only. 242 */ 243 U_I18N_API Builder& setFunctionRegistry(const MFFunctionRegistry& functionRegistry); 244 /** 245 * Sets a data model. If a pattern was previously set, it is removed. 246 * 247 * @param dataModel Data model to format. Passed by move. 248 * @return A reference to the builder. 249 * 250 * @internal ICU 75 technology preview 251 * @deprecated This API is for technology preview only. 252 */ 253 U_I18N_API Builder& setDataModel(MFDataModel&& dataModel); 254 /** 255 * Set the error handling behavior for this formatter. 256 * 257 * "Strict" error behavior means that that formatting methods 258 * will set their UErrorCode arguments to signal MessageFormat 259 * data model, resolution, and runtime errors. Syntax errors are 260 * always signaled. 261 * 262 * "Best effort" error behavior means that MessageFormat errors are 263 * suppressed: formatting methods will _not_ set their 264 * UErrorCode arguments to signal MessageFormat data model, 265 * resolution, or runtime errors. Best-effort output 266 * will be returned. Syntax errors are always signaled. 267 * This is the default behavior. 268 * 269 * @param type An enum with type UMFErrorHandlingBehavior; 270 * if type == `U_MF_STRICT`, then 271 * errors are handled strictly. 272 * If type == `U_MF_BEST_EFFORT`, then 273 * best-effort output is returned. 274 * 275 * The default is to suppress all MessageFormat errors 276 * and return best-effort output. 277 * 278 * @return A reference to the builder. 279 * 280 * @internal ICU 76 technology preview 281 * @deprecated This API is for technology preview only. 282 */ 283 U_I18N_API Builder& setErrorHandlingBehavior(UMFErrorHandlingBehavior type); 284 /** 285 * Constructs a new immutable MessageFormatter using the pattern or data model 286 * that was previously set, and the locale (if it was previously set) 287 * or default locale (otherwise). 288 * 289 * The builder object (`this`) can still be used after calling `build()`. 290 * 291 * @param status Input/output error code. If neither the pattern 292 * nor the data model is set, set to failure code. 293 * @return The new MessageFormatter object 294 * 295 * @internal ICU 75 technology preview 296 * @deprecated This API is for technology preview only. 297 */ 298 U_I18N_API MessageFormatter build(UErrorCode& status) const; 299 /** 300 * Default constructor. 301 * Returns a Builder with the default locale and with no 302 * data model or pattern set. Either `setPattern()` 303 * or `setDataModel()` has to be called before calling `build()`. 304 * 305 * @param status Input/output error code. 306 * 307 * @internal ICU 75 technology preview 308 * @deprecated This API is for technology preview only. 309 */ 310 U_I18N_API Builder(UErrorCode& status); 311 /** 312 * Destructor. 313 * 314 * @internal ICU 75 technology preview 315 * @deprecated This API is for technology preview only. 316 */ 317 U_I18N_API virtual ~Builder(); 318 }; // class MessageFormatter::Builder 319 320 // TODO: Shouldn't be public; only used for testing 321 /** 322 * Returns a string consisting of the input with optional spaces removed. 323 * 324 * @return A normalized string representation of the input 325 * 326 * @internal ICU 75 technology preview 327 * @deprecated This API is for technology preview only. 328 */ 329 U_I18N_API const UnicodeString& getNormalizedPattern() const { return normalizedInput; } 330 331 private: 332 friend class Builder; 333 friend class Checker; 334 friend class MessageArguments; 335 friend class MessageContext; 336 337 MessageFormatter(const MessageFormatter::Builder& builder, UErrorCode &status); 338 339 MessageFormatter() = delete; // default constructor not implemented 340 341 // Do not define default assignment operator 342 const MessageFormatter &operator=(const MessageFormatter &) = delete; 343 344 // Selection methods 345 346 // Takes a vector of FormattedPlaceholders 347 void resolveSelectors(MessageContext&, const Environment& env, UErrorCode&, UVector&) const; 348 // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (output) 349 void filterVariants(const UVector&, UVector&, UErrorCode&) const; 350 // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (input/output) 351 void sortVariants(const UVector&, UVector&, UErrorCode&) const; 352 // Takes a vector of strings (input) and a vector of strings (output) 353 void matchSelectorKeys(const UVector&, MessageContext&, InternalValue* rv, UVector&, UErrorCode&) const; 354 // Takes a vector of FormattedPlaceholders (input), 355 // and a vector of vectors of strings (output) 356 void resolvePreferences(MessageContext&, UVector&, UVector&, UErrorCode&) const; 357 358 // Formatting methods 359 360 [[nodiscard]] FormattedPlaceholder formatLiteral(const UnicodeString&, const data_model::Literal&) const; 361 void formatPattern(MessageContext&, const Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const; 362 // Evaluates a function call 363 // Dispatches on argument type 364 [[nodiscard]] InternalValue* evalFunctionCall(FormattedPlaceholder&& argument, 365 MessageContext& context, 366 UErrorCode& status) const; 367 // Dispatches on function name 368 [[nodiscard]] InternalValue* evalFunctionCall(const FunctionName& functionName, 369 InternalValue* argument, 370 FunctionOptions&& options, 371 MessageContext& context, 372 UErrorCode& status) const; 373 // Formats an expression that appears in a pattern or as the definition of a local variable 374 [[nodiscard]] InternalValue* formatExpression(const UnicodeString&, 375 const Environment&, 376 const data_model::Expression&, 377 MessageContext&, 378 UErrorCode&) const; 379 [[nodiscard]] FunctionOptions resolveOptions(const Environment& env, const OptionMap&, MessageContext&, UErrorCode&) const; 380 [[nodiscard]] InternalValue* formatOperand(const UnicodeString&, 381 const Environment&, 382 const data_model::Operand&, 383 MessageContext&, 384 UErrorCode&) const; 385 [[nodiscard]] FormattedPlaceholder evalArgument(const UnicodeString&, 386 const data_model::VariableName&, 387 MessageContext&, 388 UErrorCode&) const; 389 void formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const; 390 391 // Function registry methods 392 bool hasCustomMFFunctionRegistry() const { 393 return (customMFFunctionRegistry != nullptr); 394 } 395 396 // Precondition: custom function registry exists 397 // Note: this is non-const because the values in the MFFunctionRegistry are mutable 398 // (a FormatterFactory can have mutable state) 399 const MFFunctionRegistry& getCustomMFFunctionRegistry() const; 400 401 bool isCustomFormatter(const FunctionName&) const; 402 FormatterFactory* lookupFormatterFactory(const FunctionName&, UErrorCode& status) const; 403 bool isBuiltInSelector(const FunctionName&) const; 404 bool isBuiltInFormatter(const FunctionName&) const; 405 bool isCustomSelector(const FunctionName&) const; 406 const SelectorFactory* lookupSelectorFactory(MessageContext&, const FunctionName&, UErrorCode&) const; 407 bool isSelector(const FunctionName& fn) const { return isBuiltInSelector(fn) || isCustomSelector(fn); } 408 bool isFormatter(const FunctionName& fn) const { return isBuiltInFormatter(fn) || isCustomFormatter(fn); } 409 const Formatter* lookupFormatter(const FunctionName&, UErrorCode&) const; 410 411 Selector* getSelector(MessageContext&, const FunctionName&, UErrorCode&) const; 412 Formatter* getFormatter(const FunctionName&, UErrorCode&) const; 413 bool getDefaultFormatterNameByType(const UnicodeString&, FunctionName&) const; 414 415 // Checking for resolution errors 416 void checkDeclarations(MessageContext&, Environment*&, UErrorCode&) const; 417 void check(MessageContext&, const Environment&, const data_model::Expression&, UErrorCode&) const; 418 void check(MessageContext&, const Environment&, const data_model::Operand&, UErrorCode&) const; 419 void check(MessageContext&, const Environment&, const OptionMap&, UErrorCode&) const; 420 421 void initErrors(UErrorCode&); 422 void clearErrors() const; 423 void cleanup() noexcept; 424 425 // The locale this MessageFormatter was created with 426 /* const */ Locale locale; 427 428 // Registry for built-in functions 429 MFFunctionRegistry standardMFFunctionRegistry; 430 // Registry for custom functions; may be null if no custom registry supplied 431 // Note: this is *not* owned by the MessageFormatter object 432 // The reason for this choice is to have a non-destructive MessageFormatter::Builder, 433 // while also not requiring the function registry to be deeply-copyable. Making the 434 // function registry copyable would impose a requirement on any implementations 435 // of the FormatterFactory and SelectorFactory interfaces to implement a custom 436 // clone() method, which is necessary to avoid sharing between copies of the 437 // function registry (and thus double-frees) 438 // Not deeply immutable (the values in the function registry are mutable, 439 // as a FormatterFactory can have mutable state 440 const MFFunctionRegistry* customMFFunctionRegistry; 441 442 // Data model, representing the parsed message 443 MFDataModel dataModel; 444 445 // Normalized version of the input string (optional whitespace removed) 446 UnicodeString normalizedInput; 447 448 // Errors -- only used while parsing and checking for data model errors; then 449 // the MessageContext keeps track of errors 450 // Must be a raw pointer to avoid including the internal header file 451 // defining StaticErrors 452 // Owned by `this` 453 StaticErrors* errors = nullptr; 454 455 // Error handling behavior. 456 // If true, then formatting methods set their UErrorCode arguments 457 // to signal MessageFormat errors, and no useful output is returned. 458 // If false, then MessageFormat errors are not signaled and the 459 // formatting methods return best-effort output. 460 // The default is false. 461 bool signalErrors = false; 462 463 }; // class MessageFormatter 464 465 } // namespace message2 466 467 U_NAMESPACE_END 468 469 #endif // U_HIDE_DEPRECATED_API 470 471 #endif /* #if !UCONFIG_NO_MF2 */ 472 473 #endif /* #if !UCONFIG_NO_FORMATTING */ 474 475 #endif /* #if !UCONFIG_NO_NORMALIZATION */ 476 477 #endif /* U_SHOW_CPLUSPLUS_API */ 478 479 #endif // MESSAGEFORMAT2_H 480 481 // eof