messageformat2.cpp (33768B)
1 // © 2024 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #include "unicode/utypes.h" 5 6 #if !UCONFIG_NO_NORMALIZATION 7 8 #if !UCONFIG_NO_FORMATTING 9 10 #if !UCONFIG_NO_MF2 11 12 #include "unicode/messageformat2_arguments.h" 13 #include "unicode/messageformat2_data_model.h" 14 #include "unicode/messageformat2_formattable.h" 15 #include "unicode/messageformat2.h" 16 #include "unicode/normalizer2.h" 17 #include "unicode/unistr.h" 18 #include "messageformat2_allocation.h" 19 #include "messageformat2_checker.h" 20 #include "messageformat2_evaluation.h" 21 #include "messageformat2_function_registry_internal.h" 22 #include "messageformat2_macros.h" 23 24 25 U_NAMESPACE_BEGIN 26 27 namespace message2 { 28 29 using namespace data_model; 30 31 // ------------------------------------------------------ 32 // Formatting 33 34 // The result of formatting a literal is just itself. 35 static Formattable evalLiteral(const Literal& lit) { 36 return Formattable(lit.unquoted()); 37 } 38 39 // Assumes that `var` is a message argument; returns the argument's value. 40 [[nodiscard]] FormattedPlaceholder MessageFormatter::evalArgument(const UnicodeString& fallback, 41 const VariableName& var, 42 MessageContext& context, 43 UErrorCode& errorCode) const { 44 if (U_SUCCESS(errorCode)) { 45 const Formattable* val = context.getGlobal(var, errorCode); 46 if (U_SUCCESS(errorCode)) { 47 // Note: the fallback string has to be passed in because in a declaration like: 48 // .local $foo = {$bar :number} 49 // the fallback for $bar is "$foo". 50 UnicodeString fallbackToUse = fallback; 51 if (fallbackToUse.isEmpty()) { 52 fallbackToUse += DOLLAR; 53 fallbackToUse += var; 54 } 55 return (FormattedPlaceholder(*val, fallbackToUse)); 56 } 57 } 58 return {}; 59 } 60 61 // Helper function to re-escape any escaped-char characters 62 static UnicodeString reserialize(const UnicodeString& s) { 63 UnicodeString result(PIPE); 64 for (int32_t i = 0; i < s.length(); i++) { 65 switch(s[i]) { 66 case BACKSLASH: 67 case PIPE: 68 case LEFT_CURLY_BRACE: 69 case RIGHT_CURLY_BRACE: { 70 result += BACKSLASH; 71 break; 72 } 73 default: 74 break; 75 } 76 result += s[i]; 77 } 78 result += PIPE; 79 return result; 80 } 81 82 // Returns the contents of the literal 83 [[nodiscard]] FormattedPlaceholder MessageFormatter::formatLiteral(const UnicodeString& fallback, 84 const Literal& lit) const { 85 // The fallback for a literal is itself, unless another fallback is passed in 86 // (same reasoning as evalArgument()) 87 UnicodeString fallbackToUse = fallback.isEmpty() ? reserialize(lit.unquoted()) : fallback; 88 return FormattedPlaceholder(evalLiteral(lit), fallbackToUse); 89 } 90 91 [[nodiscard]] InternalValue* MessageFormatter::formatOperand(const UnicodeString& fallback, 92 const Environment& env, 93 const Operand& rand, 94 MessageContext& context, 95 UErrorCode &status) const { 96 if (U_FAILURE(status)) { 97 return {}; 98 } 99 100 if (rand.isNull()) { 101 return create<InternalValue>(InternalValue(FormattedPlaceholder()), status); 102 } 103 if (rand.isVariable()) { 104 // Check if it's local or global 105 // Note: there is no name shadowing; this is enforced by the parser 106 const VariableName& var = rand.asVariable(); 107 // TODO: Currently, this code implements lazy evaluation of locals. 108 // That is, the environment binds names to a closure, not a resolved value. 109 // Eager vs. lazy evaluation is an open issue: 110 // see https://github.com/unicode-org/message-format-wg/issues/299 111 112 // NFC-normalize the variable name. See 113 // https://github.com/unicode-org/message-format-wg/blob/main/spec/syntax.md#names-and-identifiers 114 const VariableName normalized = StandardFunctions::normalizeNFC(var); 115 116 // Look up the variable in the environment 117 if (env.has(normalized)) { 118 // `var` is a local -- look it up 119 const Closure& rhs = env.lookup(normalized); 120 // Format the expression using the environment from the closure 121 // The name of this local variable is the fallback for its RHS. 122 UnicodeString newFallback(DOLLAR); 123 newFallback += var; 124 return formatExpression(newFallback, rhs.getEnv(), rhs.getExpr(), context, status); 125 } 126 // Variable wasn't found in locals -- check if it's global 127 FormattedPlaceholder result = evalArgument(fallback, normalized, context, status); 128 if (status == U_ILLEGAL_ARGUMENT_ERROR) { 129 status = U_ZERO_ERROR; 130 // Unbound variable -- set a resolution error 131 context.getErrors().setUnresolvedVariable(var, status); 132 // Use fallback per 133 // https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#fallback-resolution 134 UnicodeString str(DOLLAR); 135 str += var; 136 return create<InternalValue>(InternalValue(FormattedPlaceholder(str)), status); 137 } 138 return create<InternalValue>(InternalValue(std::move(result)), status); 139 } else { 140 U_ASSERT(rand.isLiteral()); 141 return create<InternalValue>(InternalValue(formatLiteral(fallback, rand.asLiteral())), status); 142 } 143 } 144 145 // Resolves a function's options 146 FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const OptionMap& options, MessageContext& context, UErrorCode& status) const { 147 LocalPointer<UVector> optionsVector(createUVector(status)); 148 if (U_FAILURE(status)) { 149 return {}; 150 } 151 LocalPointer<ResolvedFunctionOption> resolvedOpt; 152 for (int i = 0; i < options.size(); i++) { 153 const Option& opt = options.getOption(i, status); 154 if (U_FAILURE(status)) { 155 return {}; 156 } 157 const UnicodeString& k = opt.getName(); 158 const Operand& v = opt.getValue(); 159 160 // Options are fully evaluated before calling the function 161 // Format the operand 162 LocalPointer<InternalValue> rhsVal(formatOperand({}, env, v, context, status)); 163 if (U_FAILURE(status)) { 164 return {}; 165 } 166 // Note: this means option values are "eagerly" evaluated. 167 // Currently, options don't have options. This will be addressed by the 168 // full FormattedPlaceholder redesign. 169 FormattedPlaceholder optValue = rhsVal->forceFormatting(context.getErrors(), status); 170 resolvedOpt.adoptInstead(create<ResolvedFunctionOption> 171 (ResolvedFunctionOption(k, 172 optValue.asFormattable(), 173 v.isLiteral()), 174 status)); 175 if (U_FAILURE(status)) { 176 return {}; 177 } 178 optionsVector->adoptElement(resolvedOpt.orphan(), status); 179 } 180 return FunctionOptions(std::move(*optionsVector), status); 181 } 182 183 // Overload that dispatches on argument type. Syntax doesn't provide for options in this case. 184 [[nodiscard]] InternalValue* MessageFormatter::evalFunctionCall(FormattedPlaceholder&& argument, 185 MessageContext& context, 186 UErrorCode& status) const { 187 if (U_FAILURE(status)) { 188 return nullptr; 189 } 190 191 // These cases should have been checked for already 192 U_ASSERT(!argument.isFallback() && !argument.isNullOperand()); 193 194 const Formattable& toFormat = argument.asFormattable(); 195 switch (toFormat.getType()) { 196 case UFMT_OBJECT: { 197 const FormattableObject* obj = toFormat.getObject(status); 198 U_ASSERT(U_SUCCESS(status)); 199 U_ASSERT(obj != nullptr); 200 const UnicodeString& type = obj->tag(); 201 FunctionName functionName; 202 if (!getDefaultFormatterNameByType(type, functionName)) { 203 // No formatter for this type -- follow default behavior 204 break; 205 } 206 return evalFunctionCall(functionName, 207 create<InternalValue>(std::move(argument), status), 208 FunctionOptions(), 209 context, 210 status); 211 } 212 default: { 213 // TODO: The array case isn't handled yet; not sure whether it's desirable 214 // to have a default list formatter 215 break; 216 } 217 } 218 // No formatter for this type, or it's a primitive type (which will be formatted later) 219 // -- just return the argument itself 220 return create<InternalValue>(std::move(argument), status); 221 } 222 223 // Overload that dispatches on function name 224 // Adopts `arg` 225 [[nodiscard]] InternalValue* MessageFormatter::evalFunctionCall(const FunctionName& functionName, 226 InternalValue* arg_, 227 FunctionOptions&& options, 228 MessageContext& context, 229 UErrorCode& status) const { 230 if (U_FAILURE(status)) { 231 return {}; 232 } 233 234 LocalPointer<InternalValue> arg(arg_); 235 236 // Look up the formatter or selector 237 LocalPointer<Formatter> formatterImpl(nullptr); 238 LocalPointer<Selector> selectorImpl(nullptr); 239 if (isFormatter(functionName)) { 240 formatterImpl.adoptInstead(getFormatter(functionName, status)); 241 U_ASSERT(U_SUCCESS(status)); 242 } 243 if (isSelector(functionName)) { 244 selectorImpl.adoptInstead(getSelector(context, functionName, status)); 245 U_ASSERT(U_SUCCESS(status)); 246 } 247 if (formatterImpl == nullptr && selectorImpl == nullptr) { 248 // Unknown function error 249 context.getErrors().setUnknownFunction(functionName, status); 250 251 if (arg->hasNullOperand()) { 252 // Non-selector used as selector; an error would have been recorded earlier 253 UnicodeString fallback(COLON); 254 fallback += functionName; 255 return new InternalValue(FormattedPlaceholder(fallback)); 256 } else { 257 return new InternalValue(FormattedPlaceholder(arg->getFallback())); 258 } 259 } 260 return new InternalValue(arg.orphan(), 261 std::move(options), 262 functionName, 263 formatterImpl.isValid() ? formatterImpl.orphan() : nullptr, 264 selectorImpl.isValid() ? selectorImpl.orphan() : nullptr); 265 } 266 267 // Formats an expression using `globalEnv` for the values of variables 268 [[nodiscard]] InternalValue* MessageFormatter::formatExpression(const UnicodeString& fallback, 269 const Environment& globalEnv, 270 const Expression& expr, 271 MessageContext& context, 272 UErrorCode &status) const { 273 if (U_FAILURE(status)) { 274 return {}; 275 } 276 277 const Operand& rand = expr.getOperand(); 278 // Format the operand (formatOperand handles the case of a null operand) 279 LocalPointer<InternalValue> randVal(formatOperand(fallback, globalEnv, rand, context, status)); 280 281 FormattedPlaceholder maybeRand = randVal->takeArgument(status); 282 283 if (!expr.isFunctionCall() && U_SUCCESS(status)) { 284 // Dispatch based on type of `randVal` 285 if (maybeRand.isFallback()) { 286 return randVal.orphan(); 287 } 288 return evalFunctionCall(std::move(maybeRand), context, status); 289 } else if (expr.isFunctionCall()) { 290 status = U_ZERO_ERROR; 291 const Operator* rator = expr.getOperator(status); 292 U_ASSERT(U_SUCCESS(status)); 293 const FunctionName& functionName = rator->getFunctionName(); 294 const OptionMap& options = rator->getOptionsInternal(); 295 // Resolve the options 296 FunctionOptions resolvedOptions = resolveOptions(globalEnv, options, context, status); 297 298 // Call the formatter function 299 return evalFunctionCall(functionName, 300 randVal.orphan(), 301 std::move(resolvedOptions), 302 context, 303 status); 304 } else { 305 status = U_ZERO_ERROR; 306 return randVal.orphan(); 307 } 308 } 309 310 // Formats each text and expression part of a pattern, appending the results to `result` 311 void MessageFormatter::formatPattern(MessageContext& context, const Environment& globalEnv, const Pattern& pat, UErrorCode &status, UnicodeString& result) const { 312 CHECK_ERROR(status); 313 314 for (int32_t i = 0; i < pat.numParts(); i++) { 315 const PatternPart& part = pat.getPart(i); 316 if (part.isText()) { 317 result += part.asText(); 318 } else if (part.isMarkup()) { 319 // Markup is ignored 320 } else { 321 // Format the expression 322 LocalPointer<InternalValue> partVal( 323 formatExpression({}, globalEnv, part.contents(), context, status)); 324 FormattedPlaceholder partResult = partVal->forceFormatting(context.getErrors(), 325 status); 326 // Force full evaluation, e.g. applying default formatters to 327 // unformatted input (or formatting numbers as strings) 328 result += partResult.formatToString(locale, status); 329 // Handle formatting errors. `formatToString()` can't take a context and thus can't 330 // register an error directly 331 if (status == U_MF_FORMATTING_ERROR) { 332 status = U_ZERO_ERROR; 333 // TODO: The name of the formatter that failed is unavailable. 334 // Not ideal, but it's hard for `formatToString()` 335 // to pass along more detailed diagnostics 336 context.getErrors().setFormattingError(status); 337 } 338 } 339 } 340 } 341 342 // ------------------------------------------------------ 343 // Selection 344 345 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-selectors 346 // `res` is a vector of ResolvedSelectors 347 void MessageFormatter::resolveSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UVector& res) const { 348 CHECK_ERROR(status); 349 U_ASSERT(!dataModel.hasPattern()); 350 351 const VariableName* selectors = dataModel.getSelectorsInternal(); 352 // 1. Let res be a new empty list of resolved values that support selection. 353 // (Implicit, since `res` is an out-parameter) 354 // 2. For each expression exp of the message's selectors 355 for (int32_t i = 0; i < dataModel.numSelectors(); i++) { 356 // 2i. Let rv be the resolved value of exp. 357 LocalPointer<InternalValue> rv(formatOperand({}, env, Operand(selectors[i]), context, status)); 358 if (rv->canSelect()) { 359 // 2ii. If selection is supported for rv: 360 // (True if this code has been reached) 361 } else { 362 // 2iii. Else: 363 // Let nomatch be a resolved value for which selection always fails. 364 // Append nomatch as the last element of the list res. 365 // Emit a Selection Error. 366 // (Note: in this case, rv, being a fallback, serves as `nomatch`) 367 DynamicErrors& err = context.getErrors(); 368 err.setSelectorError(rv->getFunctionName(), status); 369 rv.adoptInstead(new InternalValue(FormattedPlaceholder(rv->getFallback()))); 370 if (!rv.isValid()) { 371 status = U_MEMORY_ALLOCATION_ERROR; 372 return; 373 } 374 } 375 // 2ii(a). Append rv as the last element of the list res. 376 // (Also fulfills 2iii) 377 res.adoptElement(rv.orphan(), status); 378 } 379 } 380 381 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-preferences 382 // `keys` and `matches` are vectors of strings 383 void MessageFormatter::matchSelectorKeys(const UVector& keys, 384 MessageContext& context, 385 InternalValue* rv, // Does not adopt `rv` 386 UVector& keysOut, 387 UErrorCode& status) const { 388 CHECK_ERROR(status); 389 390 if (U_FAILURE(status)) { 391 // Return an empty list of matches 392 status = U_ZERO_ERROR; 393 return; 394 } 395 396 UErrorCode savedStatus = status; 397 398 // Convert `keys` to an array 399 int32_t keysLen = keys.size(); 400 UnicodeString* keysArr = new UnicodeString[keysLen]; 401 if (keysArr == nullptr) { 402 status = U_MEMORY_ALLOCATION_ERROR; 403 return; 404 } 405 for (int32_t i = 0; i < keysLen; i++) { 406 const UnicodeString* k = static_cast<UnicodeString*>(keys[i]); 407 U_ASSERT(k != nullptr); 408 keysArr[i] = *k; 409 } 410 LocalArray<UnicodeString> adoptedKeys(keysArr); 411 412 // Create an array to hold the output 413 UnicodeString* prefsArr = new UnicodeString[keysLen]; 414 if (prefsArr == nullptr) { 415 status = U_MEMORY_ALLOCATION_ERROR; 416 return; 417 } 418 LocalArray<UnicodeString> adoptedPrefs(prefsArr); 419 int32_t prefsLen = 0; 420 421 // Call the selector 422 FunctionName name = rv->getFunctionName(); 423 rv->forceSelection(context.getErrors(), 424 adoptedKeys.getAlias(), keysLen, 425 adoptedPrefs.getAlias(), prefsLen, 426 status); 427 428 // Update errors 429 if (savedStatus != status) { 430 if (U_FAILURE(status)) { 431 status = U_ZERO_ERROR; 432 context.getErrors().setSelectorError(name, status); 433 } else { 434 // Ignore warnings 435 status = savedStatus; 436 } 437 } 438 439 CHECK_ERROR(status); 440 441 // Copy the resulting keys (if there was no error) 442 keysOut.removeAllElements(); 443 for (int32_t i = 0; i < prefsLen; i++) { 444 UnicodeString* k = message2::create<UnicodeString>(std::move(prefsArr[i]), status); 445 if (k == nullptr) { 446 status = U_MEMORY_ALLOCATION_ERROR; 447 return; 448 } 449 keysOut.adoptElement(k, status); 450 CHECK_ERROR(status); 451 } 452 } 453 454 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-preferences 455 // `res` is a vector of FormattedPlaceholders; 456 // `pref` is a vector of vectors of strings 457 void MessageFormatter::resolvePreferences(MessageContext& context, UVector& res, UVector& pref, UErrorCode &status) const { 458 CHECK_ERROR(status); 459 460 // 1. Let pref be a new empty list of lists of strings. 461 // (Implicit, since `pref` is an out-parameter) 462 UnicodeString ks; 463 LocalPointer<UnicodeString> ksP; 464 int32_t numVariants = dataModel.numVariants(); 465 const Variant* variants = dataModel.getVariantsInternal(); 466 // 2. For each index i in res 467 for (int32_t i = 0; i < res.size(); i++) { 468 // 2i. Let keys be a new empty list of strings. 469 LocalPointer<UVector> keys(createUVector(status)); 470 CHECK_ERROR(status); 471 // 2ii. For each variant `var` of the message 472 for (int32_t variantNum = 0; variantNum < numVariants; variantNum++) { 473 const SelectorKeys& selectorKeys = variants[variantNum].getKeys(); 474 475 // Note: Here, `var` names the key list of `var`, 476 // not a Variant itself 477 const Key* var = selectorKeys.getKeysInternal(); 478 // 2ii(a). Let `key` be the `var` key at position i. 479 U_ASSERT(i < selectorKeys.len); // established by semantic check in formatSelectors() 480 const Key& key = var[i]; 481 // 2ii(b). If `key` is not the catch-all key '*' 482 if (!key.isWildcard()) { 483 // 2ii(b)(a) Assert that key is a literal. 484 // (Not needed) 485 // 2ii(b)(b) Let `ks` be the resolved value of `key` in Unicode Normalization Form C. 486 ks = StandardFunctions::normalizeNFC(key.asLiteral().unquoted()); 487 // 2ii(b)(c) Append `ks` as the last element of the list `keys`. 488 ksP.adoptInstead(create<UnicodeString>(std::move(ks), status)); 489 CHECK_ERROR(status); 490 keys->adoptElement(ksP.orphan(), status); 491 } 492 } 493 // 2iii. Let `rv` be the resolved value at index `i` of `res`. 494 U_ASSERT(i < res.size()); 495 InternalValue* rv = static_cast<InternalValue*>(res[i]); 496 // 2iv. Let matches be the result of calling the method MatchSelectorKeys(rv, keys) 497 LocalPointer<UVector> matches(createUVector(status)); 498 matchSelectorKeys(*keys, context, std::move(rv), *matches, status); 499 // 2v. Append `matches` as the last element of the list `pref` 500 pref.adoptElement(matches.orphan(), status); 501 } 502 } 503 504 // `v` is assumed to be a vector of strings 505 static int32_t vectorFind(const UVector& v, const UnicodeString& k) { 506 for (int32_t i = 0; i < v.size(); i++) { 507 if (*static_cast<UnicodeString*>(v[i]) == k) { 508 return i; 509 } 510 } 511 return -1; 512 } 513 514 static UBool vectorContains(const UVector& v, const UnicodeString& k) { 515 return (vectorFind(v, k) != -1); 516 } 517 518 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#filter-variants 519 // `pref` is a vector of vectors of strings. `vars` is a vector of PrioritizedVariants 520 void MessageFormatter::filterVariants(const UVector& pref, UVector& vars, UErrorCode& status) const { 521 const Variant* variants = dataModel.getVariantsInternal(); 522 523 // 1. Let `vars` be a new empty list of variants. 524 // (Not needed since `vars` is an out-parameter) 525 // 2. For each variant `var` of the message: 526 for (int32_t j = 0; j < dataModel.numVariants(); j++) { 527 const SelectorKeys& selectorKeys = variants[j].getKeys(); 528 const Pattern& p = variants[j].getPattern(); 529 530 // Note: Here, `var` names the key list of `var`, 531 // not a Variant itself 532 const Key* var = selectorKeys.getKeysInternal(); 533 // 2i. For each index `i` in `pref`: 534 bool noMatch = false; 535 for (int32_t i = 0; i < pref.size(); i++) { 536 // 2i(a). Let `key` be the `var` key at position `i`. 537 U_ASSERT(i < selectorKeys.len); 538 const Key& key = var[i]; 539 // 2i(b). If key is the catch-all key '*': 540 if (key.isWildcard()) { 541 // 2i(b)(a). Continue the inner loop on pref. 542 continue; 543 } 544 // 2i(c). Assert that `key` is a literal. 545 // (Not needed) 546 // 2i(d). Let `ks` be the resolved value of `key`. 547 UnicodeString ks = StandardFunctions::normalizeNFC(key.asLiteral().unquoted()); 548 // 2i(e). Let `matches` be the list of strings at index `i` of `pref`. 549 const UVector& matches = *(static_cast<UVector*>(pref[i])); // `matches` is a vector of strings 550 // 2i(f). If `matches` includes `ks` 551 if (vectorContains(matches, ks)) { 552 // 2i(f)(a). Continue the inner loop on `pref`. 553 continue; 554 } 555 // 2i(g). Else: 556 // 2i(g)(a). Continue the outer loop on message variants. 557 noMatch = true; 558 break; 559 } 560 if (!noMatch) { 561 // Append `var` as the last element of the list `vars`. 562 PrioritizedVariant* tuple = create<PrioritizedVariant>(PrioritizedVariant(-1, selectorKeys, p), status); 563 CHECK_ERROR(status); 564 vars.adoptElement(tuple, status); 565 } 566 } 567 } 568 569 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#sort-variants 570 // Leaves the preferred variant as element 0 in `sortable` 571 // Note: this sorts in-place, so `sortable` is just `vars` 572 // `pref` is a vector of vectors of strings; `vars` is a vector of PrioritizedVariants 573 void MessageFormatter::sortVariants(const UVector& pref, UVector& vars, UErrorCode& status) const { 574 CHECK_ERROR(status); 575 576 // Note: steps 1 and 2 are omitted since we use `vars` as `sortable` (we sort in-place) 577 // 1. Let `sortable` be a new empty list of (integer, variant) tuples. 578 // (Not needed since `sortable` is an out-parameter) 579 // 2. For each variant `var` of `vars` 580 // 2i. Let tuple be a new tuple (-1, var). 581 // 2ii. Append `tuple` as the last element of the list `sortable`. 582 583 // 3. Let `len` be the integer count of items in `pref`. 584 int32_t len = pref.size(); 585 // 4. Let `i` be `len` - 1. 586 int32_t i = len - 1; 587 // 5. While i >= 0: 588 while (i >= 0) { 589 // 5i. Let `matches` be the list of strings at index `i` of `pref`. 590 U_ASSERT(pref[i] != nullptr); 591 const UVector& matches = *(static_cast<UVector*>(pref[i])); // `matches` is a vector of strings 592 // 5ii. Let `minpref` be the integer count of items in `matches`. 593 int32_t minpref = matches.size(); 594 // 5iii. For each tuple `tuple` of `sortable`: 595 for (int32_t j = 0; j < vars.size(); j++) { 596 U_ASSERT(vars[j] != nullptr); 597 PrioritizedVariant& tuple = *(static_cast<PrioritizedVariant*>(vars[j])); 598 // 5iii(a). Let matchpref be an integer with the value minpref. 599 int32_t matchpref = minpref; 600 // 5iii(b). Let `key` be the tuple variant key at position `i`. 601 const Key* tupleVariantKeys = tuple.keys.getKeysInternal(); 602 U_ASSERT(i < tuple.keys.len); // Given by earlier semantic checking 603 const Key& key = tupleVariantKeys[i]; 604 // 5iii(c) If `key` is not the catch-all key '*': 605 if (!key.isWildcard()) { 606 // 5iii(c)(a). Assert that `key` is a literal. 607 // (Not needed) 608 // 5iii(c)(b). Let `ks` be the resolved value of `key`. 609 UnicodeString ks = StandardFunctions::normalizeNFC(key.asLiteral().unquoted()); 610 // 5iii(c)(c) Let matchpref be the integer position of ks in `matches`. 611 matchpref = vectorFind(matches, ks); 612 U_ASSERT(matchpref >= 0); 613 } 614 // 5iii(d) Set the `tuple` integer value as matchpref. 615 tuple.priority = matchpref; 616 } 617 // 5iv. Set `sortable` to be the result of calling the method SortVariants(`sortable`) 618 vars.sort(comparePrioritizedVariants, status); 619 CHECK_ERROR(status); 620 // 5v. Set `i` to be `i` - 1. 621 i--; 622 } 623 // The caller is responsible for steps 6 and 7 624 // 6. Let `var` be the `variant` element of the first element of `sortable`. 625 // 7. Select the pattern of `var` 626 } 627 628 void MessageFormatter::formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const { 629 CHECK_ERROR(status); 630 631 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#pattern-selection 632 633 // Resolve Selectors 634 // res is a vector of InternalValues 635 LocalPointer<UVector> res(createUVector(status)); 636 CHECK_ERROR(status); 637 resolveSelectors(context, env, status, *res); 638 639 // Resolve Preferences 640 // pref is a vector of vectors of strings 641 LocalPointer<UVector> pref(createUVector(status)); 642 CHECK_ERROR(status); 643 resolvePreferences(context, *res, *pref, status); 644 645 // Filter Variants 646 // vars is a vector of PrioritizedVariants 647 LocalPointer<UVector> vars(createUVector(status)); 648 CHECK_ERROR(status); 649 filterVariants(*pref, *vars, status); 650 651 // Sort Variants and select the final pattern 652 // Note: `sortable` in the spec is just `vars` here, 653 // which is sorted in-place 654 sortVariants(*pref, *vars, status); 655 656 CHECK_ERROR(status); 657 658 // 6. Let `var` be the `variant` element of the first element of `sortable`. 659 U_ASSERT(vars->size() > 0); // This should have been checked earlier (having 0 variants would be a data model error) 660 const PrioritizedVariant& var = *(static_cast<PrioritizedVariant*>(vars->elementAt(0))); 661 // 7. Select the pattern of `var` 662 const Pattern& pat = var.pat; 663 664 // Format the pattern 665 formatPattern(context, env, pat, status, result); 666 } 667 668 // Note: this is non-const due to the function registry being non-const, which is in turn 669 // due to the values (`FormatterFactory` objects in the map) having mutable state. 670 // In other words, formatting a message can mutate the underlying `MessageFormatter` by changing 671 // state within the factory objects that represent custom formatters. 672 UnicodeString MessageFormatter::formatToString(const MessageArguments& arguments, UErrorCode &status) { 673 EMPTY_ON_ERROR(status); 674 675 // Create a new context with the given arguments and the `errors` structure 676 MessageContext context(arguments, *errors, status); 677 UnicodeString result; 678 679 if (!(errors->hasSyntaxError() || errors->hasDataModelError())) { 680 // Create a new environment that will store closures for all local variables 681 // Check for unresolved variable errors 682 // checkDeclarations needs a reference to the pointer to the environment 683 // since it uses its `env` argument as an out-parameter. So it needs to be 684 // temporarily not a LocalPointer... 685 Environment* env(Environment::create(status)); 686 checkDeclarations(context, env, status); 687 // ...and then it's adopted to avoid leaks 688 LocalPointer<Environment> globalEnv(env); 689 690 if (dataModel.hasPattern()) { 691 formatPattern(context, *globalEnv, dataModel.getPattern(), status, result); 692 } else { 693 // Check for errors/warnings -- if so, then the result of pattern selection is the fallback value 694 // See https://www.unicode.org/reports/tr35/tr35-messageFormat.html#pattern-selection 695 const DynamicErrors& err = context.getErrors(); 696 if (err.hasSyntaxError() || err.hasDataModelError()) { 697 result += REPLACEMENT; 698 } else { 699 formatSelectors(context, *globalEnv, status, result); 700 } 701 } 702 } 703 704 // Update status according to all errors seen while formatting 705 if (signalErrors) { 706 context.checkErrors(status); 707 } 708 if (U_FAILURE(status)) { 709 result.remove(); 710 } 711 return result; 712 } 713 714 // ---------------------------------------- 715 // Checking for resolution errors 716 717 void MessageFormatter::check(MessageContext& context, const Environment& localEnv, const OptionMap& options, UErrorCode& status) const { 718 // Check the RHS of each option 719 for (int32_t i = 0; i < options.size(); i++) { 720 const Option& opt = options.getOption(i, status); 721 CHECK_ERROR(status); 722 check(context, localEnv, opt.getValue(), status); 723 } 724 } 725 726 void MessageFormatter::check(MessageContext& context, const Environment& localEnv, const Operand& rand, UErrorCode& status) const { 727 // Nothing to check for literals 728 if (rand.isLiteral() || rand.isNull()) { 729 return; 730 } 731 732 // Check that variable is in scope 733 const VariableName& var = rand.asVariable(); 734 UnicodeString normalized = StandardFunctions::normalizeNFC(var); 735 736 // Check local scope 737 if (localEnv.has(normalized)) { 738 return; 739 } 740 // Check global scope 741 context.getGlobal(normalized, status); 742 if (status == U_ILLEGAL_ARGUMENT_ERROR) { 743 status = U_ZERO_ERROR; 744 context.getErrors().setUnresolvedVariable(var, status); 745 } 746 // Either `var` is a global, or some other error occurred. 747 // Nothing more to do either way 748 return; 749 } 750 751 void MessageFormatter::check(MessageContext& context, const Environment& localEnv, const Expression& expr, UErrorCode& status) const { 752 // Check for unresolved variable errors 753 if (expr.isFunctionCall()) { 754 const Operator* rator = expr.getOperator(status); 755 U_ASSERT(U_SUCCESS(status)); 756 const Operand& rand = expr.getOperand(); 757 check(context, localEnv, rand, status); 758 check(context, localEnv, rator->getOptionsInternal(), status); 759 } 760 } 761 762 // Check for resolution errors 763 void MessageFormatter::checkDeclarations(MessageContext& context, Environment*& env, UErrorCode &status) const { 764 CHECK_ERROR(status); 765 766 const Binding* decls = getDataModel().getLocalVariablesInternal(); 767 U_ASSERT(env != nullptr && (decls != nullptr || getDataModel().bindingsLen == 0)); 768 769 for (int32_t i = 0; i < getDataModel().bindingsLen; i++) { 770 const Binding& decl = decls[i]; 771 const Expression& rhs = decl.getValue(); 772 check(context, *env, rhs, status); 773 774 // Add a closure to the global environment, 775 // memoizing the value of localEnv up to this point 776 777 // Add the LHS to the environment for checking the next declaration 778 env = Environment::create(StandardFunctions::normalizeNFC(decl.getVariable()), 779 Closure(rhs, *env), 780 env, 781 status); 782 CHECK_ERROR(status); 783 } 784 } 785 } // namespace message2 786 787 U_NAMESPACE_END 788 789 #endif /* #if !UCONFIG_NO_MF2 */ 790 791 #endif /* #if !UCONFIG_NO_FORMATTING */ 792 793 #endif /* #if !UCONFIG_NO_NORMALIZATION */