tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

messageformat2.cpp (33768B)


      1 // © 2024 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 #include "unicode/utypes.h"
      5 
      6 #if !UCONFIG_NO_NORMALIZATION
      7 
      8 #if !UCONFIG_NO_FORMATTING
      9 
     10 #if !UCONFIG_NO_MF2
     11 
     12 #include "unicode/messageformat2_arguments.h"
     13 #include "unicode/messageformat2_data_model.h"
     14 #include "unicode/messageformat2_formattable.h"
     15 #include "unicode/messageformat2.h"
     16 #include "unicode/normalizer2.h"
     17 #include "unicode/unistr.h"
     18 #include "messageformat2_allocation.h"
     19 #include "messageformat2_checker.h"
     20 #include "messageformat2_evaluation.h"
     21 #include "messageformat2_function_registry_internal.h"
     22 #include "messageformat2_macros.h"
     23 
     24 
     25 U_NAMESPACE_BEGIN
     26 
     27 namespace message2 {
     28 
     29 using namespace data_model;
     30 
     31 // ------------------------------------------------------
     32 // Formatting
     33 
     34 // The result of formatting a literal is just itself.
     35 static Formattable evalLiteral(const Literal& lit) {
     36    return Formattable(lit.unquoted());
     37 }
     38 
     39 // Assumes that `var` is a message argument; returns the argument's value.
     40 [[nodiscard]] FormattedPlaceholder MessageFormatter::evalArgument(const UnicodeString& fallback,
     41                                                                  const VariableName& var,
     42                                                                  MessageContext& context,
     43                                                                  UErrorCode& errorCode) const {
     44    if (U_SUCCESS(errorCode)) {
     45        const Formattable* val = context.getGlobal(var, errorCode);
     46        if (U_SUCCESS(errorCode)) {
     47            // Note: the fallback string has to be passed in because in a declaration like:
     48            // .local $foo = {$bar :number}
     49            // the fallback for $bar is "$foo".
     50            UnicodeString fallbackToUse = fallback;
     51            if (fallbackToUse.isEmpty()) {
     52                fallbackToUse += DOLLAR;
     53                fallbackToUse += var;
     54            }
     55            return (FormattedPlaceholder(*val, fallbackToUse));
     56        }
     57    }
     58    return {};
     59 }
     60 
     61 // Helper function to re-escape any escaped-char characters
     62 static UnicodeString reserialize(const UnicodeString& s) {
     63    UnicodeString result(PIPE);
     64    for (int32_t i = 0; i < s.length(); i++) {
     65        switch(s[i]) {
     66        case BACKSLASH:
     67        case PIPE:
     68        case LEFT_CURLY_BRACE:
     69        case RIGHT_CURLY_BRACE: {
     70            result += BACKSLASH;
     71            break;
     72        }
     73        default:
     74            break;
     75        }
     76        result += s[i];
     77    }
     78    result += PIPE;
     79    return result;
     80 }
     81 
     82 // Returns the contents of the literal
     83 [[nodiscard]] FormattedPlaceholder MessageFormatter::formatLiteral(const UnicodeString& fallback,
     84                                                                   const Literal& lit) const {
     85    // The fallback for a literal is itself, unless another fallback is passed in
     86    // (same reasoning as evalArgument())
     87    UnicodeString fallbackToUse = fallback.isEmpty() ? reserialize(lit.unquoted()) : fallback;
     88    return FormattedPlaceholder(evalLiteral(lit), fallbackToUse);
     89 }
     90 
     91 [[nodiscard]] InternalValue* MessageFormatter::formatOperand(const UnicodeString& fallback,
     92                                                             const Environment& env,
     93                                                             const Operand& rand,
     94                                                             MessageContext& context,
     95                                                             UErrorCode &status) const {
     96    if (U_FAILURE(status)) {
     97        return {};
     98    }
     99 
    100    if (rand.isNull()) {
    101        return create<InternalValue>(InternalValue(FormattedPlaceholder()), status);
    102    }
    103    if (rand.isVariable()) {
    104        // Check if it's local or global
    105        // Note: there is no name shadowing; this is enforced by the parser
    106        const VariableName& var = rand.asVariable();
    107        // TODO: Currently, this code implements lazy evaluation of locals.
    108        // That is, the environment binds names to a closure, not a resolved value.
    109        // Eager vs. lazy evaluation is an open issue:
    110        // see https://github.com/unicode-org/message-format-wg/issues/299
    111 
    112        // NFC-normalize the variable name. See
    113        // https://github.com/unicode-org/message-format-wg/blob/main/spec/syntax.md#names-and-identifiers
    114        const VariableName normalized = StandardFunctions::normalizeNFC(var);
    115 
    116        // Look up the variable in the environment
    117        if (env.has(normalized)) {
    118          // `var` is a local -- look it up
    119          const Closure& rhs = env.lookup(normalized);
    120          // Format the expression using the environment from the closure
    121          // The name of this local variable is the fallback for its RHS.
    122          UnicodeString newFallback(DOLLAR);
    123          newFallback += var;
    124          return formatExpression(newFallback, rhs.getEnv(), rhs.getExpr(), context, status);
    125        }
    126        // Variable wasn't found in locals -- check if it's global
    127        FormattedPlaceholder result = evalArgument(fallback, normalized, context, status);
    128        if (status == U_ILLEGAL_ARGUMENT_ERROR) {
    129            status = U_ZERO_ERROR;
    130            // Unbound variable -- set a resolution error
    131            context.getErrors().setUnresolvedVariable(var, status);
    132            // Use fallback per
    133            // https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#fallback-resolution
    134            UnicodeString str(DOLLAR);
    135            str += var;
    136            return create<InternalValue>(InternalValue(FormattedPlaceholder(str)), status);
    137        }
    138        return create<InternalValue>(InternalValue(std::move(result)), status);
    139    } else {
    140        U_ASSERT(rand.isLiteral());
    141        return create<InternalValue>(InternalValue(formatLiteral(fallback, rand.asLiteral())), status);
    142    }
    143 }
    144 
    145 // Resolves a function's options
    146 FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const OptionMap& options, MessageContext& context, UErrorCode& status) const {
    147    LocalPointer<UVector> optionsVector(createUVector(status));
    148    if (U_FAILURE(status)) {
    149        return {};
    150    }
    151    LocalPointer<ResolvedFunctionOption> resolvedOpt;
    152    for (int i = 0; i < options.size(); i++) {
    153        const Option& opt = options.getOption(i, status);
    154        if (U_FAILURE(status)) {
    155            return {};
    156        }
    157        const UnicodeString& k = opt.getName();
    158        const Operand& v = opt.getValue();
    159 
    160        // Options are fully evaluated before calling the function
    161        // Format the operand
    162        LocalPointer<InternalValue> rhsVal(formatOperand({}, env, v, context, status));
    163        if (U_FAILURE(status)) {
    164            return {};
    165        }
    166        // Note: this means option values are "eagerly" evaluated.
    167        // Currently, options don't have options. This will be addressed by the
    168        // full FormattedPlaceholder redesign.
    169        FormattedPlaceholder optValue = rhsVal->forceFormatting(context.getErrors(), status);
    170        resolvedOpt.adoptInstead(create<ResolvedFunctionOption>
    171                                 (ResolvedFunctionOption(k,
    172                                                         optValue.asFormattable(),
    173                                                         v.isLiteral()),
    174                                  status));
    175        if (U_FAILURE(status)) {
    176            return {};
    177        }
    178        optionsVector->adoptElement(resolvedOpt.orphan(), status);
    179    }
    180    return FunctionOptions(std::move(*optionsVector), status);
    181 }
    182 
    183 // Overload that dispatches on argument type. Syntax doesn't provide for options in this case.
    184 [[nodiscard]] InternalValue* MessageFormatter::evalFunctionCall(FormattedPlaceholder&& argument,
    185                                                                MessageContext& context,
    186                                                                UErrorCode& status) const {
    187    if (U_FAILURE(status)) {
    188        return nullptr;
    189    }
    190 
    191    // These cases should have been checked for already
    192    U_ASSERT(!argument.isFallback() && !argument.isNullOperand());
    193 
    194    const Formattable& toFormat = argument.asFormattable();
    195    switch (toFormat.getType()) {
    196    case UFMT_OBJECT: {
    197        const FormattableObject* obj = toFormat.getObject(status);
    198        U_ASSERT(U_SUCCESS(status));
    199        U_ASSERT(obj != nullptr);
    200        const UnicodeString& type = obj->tag();
    201        FunctionName functionName;
    202        if (!getDefaultFormatterNameByType(type, functionName)) {
    203            // No formatter for this type -- follow default behavior
    204            break;
    205        }
    206        return evalFunctionCall(functionName,
    207                                create<InternalValue>(std::move(argument), status),
    208                                FunctionOptions(),
    209                                context,
    210                                status);
    211    }
    212    default: {
    213        // TODO: The array case isn't handled yet; not sure whether it's desirable
    214        // to have a default list formatter
    215        break;
    216    }
    217    }
    218    // No formatter for this type, or it's a primitive type (which will be formatted later)
    219    // -- just return the argument itself
    220    return create<InternalValue>(std::move(argument), status);
    221 }
    222 
    223 // Overload that dispatches on function name
    224 // Adopts `arg`
    225 [[nodiscard]] InternalValue* MessageFormatter::evalFunctionCall(const FunctionName& functionName,
    226                                                                InternalValue* arg_,
    227                                                                FunctionOptions&& options,
    228                                                                MessageContext& context,
    229                                                                UErrorCode& status) const {
    230    if (U_FAILURE(status)) {
    231        return {};
    232    }
    233 
    234    LocalPointer<InternalValue> arg(arg_);
    235 
    236    // Look up the formatter or selector
    237    LocalPointer<Formatter> formatterImpl(nullptr);
    238    LocalPointer<Selector> selectorImpl(nullptr);
    239    if (isFormatter(functionName)) {
    240        formatterImpl.adoptInstead(getFormatter(functionName, status));
    241        U_ASSERT(U_SUCCESS(status));
    242    }
    243    if (isSelector(functionName)) {
    244        selectorImpl.adoptInstead(getSelector(context, functionName, status));
    245        U_ASSERT(U_SUCCESS(status));
    246    }
    247    if (formatterImpl == nullptr && selectorImpl == nullptr) {
    248        // Unknown function error
    249        context.getErrors().setUnknownFunction(functionName, status);
    250 
    251        if (arg->hasNullOperand()) {
    252            // Non-selector used as selector; an error would have been recorded earlier
    253            UnicodeString fallback(COLON);
    254            fallback += functionName;
    255            return new InternalValue(FormattedPlaceholder(fallback));
    256        } else {
    257            return new InternalValue(FormattedPlaceholder(arg->getFallback()));
    258        }
    259    }
    260    return new InternalValue(arg.orphan(),
    261                             std::move(options),
    262                             functionName,
    263                             formatterImpl.isValid() ? formatterImpl.orphan() : nullptr,
    264                             selectorImpl.isValid() ? selectorImpl.orphan() : nullptr);
    265 }
    266 
    267 // Formats an expression using `globalEnv` for the values of variables
    268 [[nodiscard]] InternalValue* MessageFormatter::formatExpression(const UnicodeString& fallback,
    269                                                                const Environment& globalEnv,
    270                                                                const Expression& expr,
    271                                                                MessageContext& context,
    272                                                                UErrorCode &status) const {
    273    if (U_FAILURE(status)) {
    274        return {};
    275    }
    276 
    277    const Operand& rand = expr.getOperand();
    278    // Format the operand (formatOperand handles the case of a null operand)
    279    LocalPointer<InternalValue> randVal(formatOperand(fallback, globalEnv, rand, context, status));
    280 
    281    FormattedPlaceholder maybeRand = randVal->takeArgument(status);
    282 
    283    if (!expr.isFunctionCall() && U_SUCCESS(status)) {
    284        // Dispatch based on type of `randVal`
    285         if (maybeRand.isFallback()) {
    286            return randVal.orphan();
    287        }
    288        return evalFunctionCall(std::move(maybeRand), context, status);
    289    } else if (expr.isFunctionCall()) {
    290        status = U_ZERO_ERROR;
    291        const Operator* rator = expr.getOperator(status);
    292        U_ASSERT(U_SUCCESS(status));
    293        const FunctionName& functionName = rator->getFunctionName();
    294        const OptionMap& options = rator->getOptionsInternal();
    295        // Resolve the options
    296        FunctionOptions resolvedOptions = resolveOptions(globalEnv, options, context, status);
    297 
    298        // Call the formatter function
    299        return evalFunctionCall(functionName,
    300                                randVal.orphan(),
    301                                std::move(resolvedOptions),
    302                                context,
    303                                status);
    304    } else {
    305        status = U_ZERO_ERROR;
    306        return randVal.orphan();
    307    }
    308 }
    309 
    310 // Formats each text and expression part of a pattern, appending the results to `result`
    311 void MessageFormatter::formatPattern(MessageContext& context, const Environment& globalEnv, const Pattern& pat, UErrorCode &status, UnicodeString& result) const {
    312    CHECK_ERROR(status);
    313 
    314    for (int32_t i = 0; i < pat.numParts(); i++) {
    315        const PatternPart& part = pat.getPart(i);
    316        if (part.isText()) {
    317            result += part.asText();
    318        } else if (part.isMarkup()) {
    319            // Markup is ignored
    320        } else {
    321       // Format the expression
    322              LocalPointer<InternalValue> partVal(
    323                  formatExpression({}, globalEnv, part.contents(), context, status));
    324              FormattedPlaceholder partResult = partVal->forceFormatting(context.getErrors(),
    325                                                                         status);
    326              // Force full evaluation, e.g. applying default formatters to
    327       // unformatted input (or formatting numbers as strings)
    328              result += partResult.formatToString(locale, status);
    329              // Handle formatting errors. `formatToString()` can't take a context and thus can't
    330              // register an error directly
    331              if (status == U_MF_FORMATTING_ERROR) {
    332                  status = U_ZERO_ERROR;
    333                  // TODO: The name of the formatter that failed is unavailable.
    334                  // Not ideal, but it's hard for `formatToString()`
    335                  // to pass along more detailed diagnostics
    336                  context.getErrors().setFormattingError(status);
    337              }
    338        }
    339    }
    340 }
    341 
    342 // ------------------------------------------------------
    343 // Selection
    344 
    345 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-selectors
    346 // `res` is a vector of ResolvedSelectors
    347 void MessageFormatter::resolveSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UVector& res) const {
    348    CHECK_ERROR(status);
    349    U_ASSERT(!dataModel.hasPattern());
    350 
    351    const VariableName* selectors = dataModel.getSelectorsInternal();
    352    // 1. Let res be a new empty list of resolved values that support selection.
    353    // (Implicit, since `res` is an out-parameter)
    354    // 2. For each expression exp of the message's selectors
    355    for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
    356        // 2i. Let rv be the resolved value of exp.
    357        LocalPointer<InternalValue> rv(formatOperand({}, env, Operand(selectors[i]), context, status));
    358        if (rv->canSelect()) {
    359            // 2ii. If selection is supported for rv:
    360            // (True if this code has been reached)
    361        } else {
    362            // 2iii. Else:
    363            // Let nomatch be a resolved value for which selection always fails.
    364            // Append nomatch as the last element of the list res.
    365            // Emit a Selection Error.
    366            // (Note: in this case, rv, being a fallback, serves as `nomatch`)
    367            DynamicErrors& err = context.getErrors();
    368            err.setSelectorError(rv->getFunctionName(), status);
    369            rv.adoptInstead(new InternalValue(FormattedPlaceholder(rv->getFallback())));
    370            if (!rv.isValid()) {
    371                status = U_MEMORY_ALLOCATION_ERROR;
    372                return;
    373            }
    374        }
    375        // 2ii(a). Append rv as the last element of the list res.
    376        // (Also fulfills 2iii)
    377        res.adoptElement(rv.orphan(), status);
    378    }
    379 }
    380 
    381 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-preferences
    382 // `keys` and `matches` are vectors of strings
    383 void MessageFormatter::matchSelectorKeys(const UVector& keys,
    384                                         MessageContext& context,
    385 				 InternalValue* rv, // Does not adopt `rv`
    386 				 UVector& keysOut,
    387 				 UErrorCode& status) const {
    388    CHECK_ERROR(status);
    389 
    390    if (U_FAILURE(status)) {
    391        // Return an empty list of matches
    392        status = U_ZERO_ERROR;
    393        return;
    394    }
    395 
    396    UErrorCode savedStatus = status;
    397 
    398    // Convert `keys` to an array
    399    int32_t keysLen = keys.size();
    400    UnicodeString* keysArr = new UnicodeString[keysLen];
    401    if (keysArr == nullptr) {
    402        status = U_MEMORY_ALLOCATION_ERROR;
    403        return;
    404    }
    405    for (int32_t i = 0; i < keysLen; i++) {
    406        const UnicodeString* k = static_cast<UnicodeString*>(keys[i]);
    407        U_ASSERT(k != nullptr);
    408        keysArr[i] = *k;
    409    }
    410    LocalArray<UnicodeString> adoptedKeys(keysArr);
    411 
    412    // Create an array to hold the output
    413    UnicodeString* prefsArr = new UnicodeString[keysLen];
    414    if (prefsArr == nullptr) {
    415        status = U_MEMORY_ALLOCATION_ERROR;
    416        return;
    417    }
    418    LocalArray<UnicodeString> adoptedPrefs(prefsArr);
    419    int32_t prefsLen = 0;
    420 
    421    // Call the selector
    422    FunctionName name = rv->getFunctionName();
    423    rv->forceSelection(context.getErrors(),
    424                       adoptedKeys.getAlias(), keysLen,
    425                       adoptedPrefs.getAlias(), prefsLen,
    426                       status);
    427 
    428    // Update errors
    429    if (savedStatus != status) {
    430        if (U_FAILURE(status)) {
    431            status = U_ZERO_ERROR;
    432            context.getErrors().setSelectorError(name, status);
    433        } else {
    434            // Ignore warnings
    435            status = savedStatus;
    436        }
    437    }
    438 
    439    CHECK_ERROR(status);
    440 
    441    // Copy the resulting keys (if there was no error)
    442    keysOut.removeAllElements();
    443    for (int32_t i = 0; i < prefsLen; i++) {
    444        UnicodeString* k = message2::create<UnicodeString>(std::move(prefsArr[i]), status);
    445        if (k == nullptr) {
    446            status = U_MEMORY_ALLOCATION_ERROR;
    447            return;
    448        }
    449        keysOut.adoptElement(k, status);
    450        CHECK_ERROR(status);
    451    }
    452 }
    453 
    454 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-preferences
    455 // `res` is a vector of FormattedPlaceholders;
    456 // `pref` is a vector of vectors of strings
    457 void MessageFormatter::resolvePreferences(MessageContext& context, UVector& res, UVector& pref, UErrorCode &status) const {
    458    CHECK_ERROR(status);
    459 
    460    // 1. Let pref be a new empty list of lists of strings.
    461    // (Implicit, since `pref` is an out-parameter)
    462    UnicodeString ks;
    463    LocalPointer<UnicodeString> ksP;
    464    int32_t numVariants = dataModel.numVariants();
    465    const Variant* variants = dataModel.getVariantsInternal();
    466    // 2. For each index i in res
    467    for (int32_t i = 0; i < res.size(); i++) {
    468        // 2i. Let keys be a new empty list of strings.
    469        LocalPointer<UVector> keys(createUVector(status));
    470        CHECK_ERROR(status);
    471        // 2ii. For each variant `var` of the message
    472        for (int32_t variantNum = 0; variantNum < numVariants; variantNum++) {
    473            const SelectorKeys& selectorKeys = variants[variantNum].getKeys();
    474 
    475            // Note: Here, `var` names the key list of `var`,
    476            // not a Variant itself
    477            const Key* var = selectorKeys.getKeysInternal();
    478            // 2ii(a). Let `key` be the `var` key at position i.
    479            U_ASSERT(i < selectorKeys.len); // established by semantic check in formatSelectors()
    480            const Key& key = var[i];
    481            // 2ii(b). If `key` is not the catch-all key '*'
    482            if (!key.isWildcard()) {
    483                // 2ii(b)(a) Assert that key is a literal.
    484                // (Not needed)
    485                // 2ii(b)(b) Let `ks` be the resolved value of `key` in Unicode Normalization Form C.
    486                ks = StandardFunctions::normalizeNFC(key.asLiteral().unquoted());
    487                // 2ii(b)(c) Append `ks` as the last element of the list `keys`.
    488                ksP.adoptInstead(create<UnicodeString>(std::move(ks), status));
    489                CHECK_ERROR(status);
    490                keys->adoptElement(ksP.orphan(), status);
    491            }
    492        }
    493        // 2iii. Let `rv` be the resolved value at index `i` of `res`.
    494        U_ASSERT(i < res.size());
    495        InternalValue* rv = static_cast<InternalValue*>(res[i]);
    496        // 2iv. Let matches be the result of calling the method MatchSelectorKeys(rv, keys)
    497        LocalPointer<UVector> matches(createUVector(status));
    498        matchSelectorKeys(*keys, context, std::move(rv), *matches, status);
    499        // 2v. Append `matches` as the last element of the list `pref`
    500        pref.adoptElement(matches.orphan(), status);
    501    }
    502 }
    503 
    504 // `v` is assumed to be a vector of strings
    505 static int32_t vectorFind(const UVector& v, const UnicodeString& k) {
    506    for (int32_t i = 0; i < v.size(); i++) {
    507        if (*static_cast<UnicodeString*>(v[i]) == k) {
    508            return i;
    509        }
    510    }
    511    return -1;
    512 }
    513 
    514 static UBool vectorContains(const UVector& v, const UnicodeString& k) {
    515    return (vectorFind(v, k) != -1);
    516 }
    517 
    518 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#filter-variants
    519 // `pref` is a vector of vectors of strings. `vars` is a vector of PrioritizedVariants
    520 void MessageFormatter::filterVariants(const UVector& pref, UVector& vars, UErrorCode& status) const {
    521    const Variant* variants = dataModel.getVariantsInternal();
    522 
    523    // 1. Let `vars` be a new empty list of variants.
    524    // (Not needed since `vars` is an out-parameter)
    525    // 2. For each variant `var` of the message:
    526    for (int32_t j = 0; j < dataModel.numVariants(); j++) {
    527        const SelectorKeys& selectorKeys = variants[j].getKeys();
    528        const Pattern& p = variants[j].getPattern();
    529 
    530        // Note: Here, `var` names the key list of `var`,
    531        // not a Variant itself
    532        const Key* var = selectorKeys.getKeysInternal();
    533        // 2i. For each index `i` in `pref`:
    534        bool noMatch = false;
    535        for (int32_t i = 0; i < pref.size(); i++) {
    536            // 2i(a). Let `key` be the `var` key at position `i`.
    537            U_ASSERT(i < selectorKeys.len);
    538            const Key& key = var[i];
    539            // 2i(b). If key is the catch-all key '*':
    540            if (key.isWildcard()) {
    541                // 2i(b)(a). Continue the inner loop on pref.
    542                continue;
    543            }
    544            // 2i(c). Assert that `key` is a literal.
    545            // (Not needed)
    546            // 2i(d). Let `ks` be the resolved value of `key`.
    547            UnicodeString ks = StandardFunctions::normalizeNFC(key.asLiteral().unquoted());
    548            // 2i(e). Let `matches` be the list of strings at index `i` of `pref`.
    549            const UVector& matches = *(static_cast<UVector*>(pref[i])); // `matches` is a vector of strings
    550            // 2i(f). If `matches` includes `ks`
    551            if (vectorContains(matches, ks)) {
    552                // 2i(f)(a). Continue the inner loop on `pref`.
    553                continue;
    554            }
    555            // 2i(g). Else:
    556            // 2i(g)(a). Continue the outer loop on message variants.
    557            noMatch = true;
    558            break;
    559        }
    560        if (!noMatch) {
    561            // Append `var` as the last element of the list `vars`.
    562     PrioritizedVariant* tuple = create<PrioritizedVariant>(PrioritizedVariant(-1, selectorKeys, p), status);
    563            CHECK_ERROR(status);
    564            vars.adoptElement(tuple, status);
    565        }
    566    }
    567 }
    568 
    569 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#sort-variants
    570 // Leaves the preferred variant as element 0 in `sortable`
    571 // Note: this sorts in-place, so `sortable` is just `vars`
    572 // `pref` is a vector of vectors of strings; `vars` is a vector of PrioritizedVariants
    573 void MessageFormatter::sortVariants(const UVector& pref, UVector& vars, UErrorCode& status) const {
    574    CHECK_ERROR(status);
    575 
    576 // Note: steps 1 and 2 are omitted since we use `vars` as `sortable` (we sort in-place)
    577    // 1. Let `sortable` be a new empty list of (integer, variant) tuples.
    578    // (Not needed since `sortable` is an out-parameter)
    579    // 2. For each variant `var` of `vars`
    580    // 2i. Let tuple be a new tuple (-1, var).
    581    // 2ii. Append `tuple` as the last element of the list `sortable`.
    582 
    583    // 3. Let `len` be the integer count of items in `pref`.
    584    int32_t len = pref.size();
    585    // 4. Let `i` be `len` - 1.
    586    int32_t i = len - 1;
    587    // 5. While i >= 0:
    588    while (i >= 0) {
    589        // 5i. Let `matches` be the list of strings at index `i` of `pref`.
    590        U_ASSERT(pref[i] != nullptr);
    591 const UVector& matches = *(static_cast<UVector*>(pref[i])); // `matches` is a vector of strings
    592        // 5ii. Let `minpref` be the integer count of items in `matches`.
    593        int32_t minpref = matches.size();
    594        // 5iii. For each tuple `tuple` of `sortable`:
    595        for (int32_t j = 0; j < vars.size(); j++) {
    596            U_ASSERT(vars[j] != nullptr);
    597            PrioritizedVariant& tuple = *(static_cast<PrioritizedVariant*>(vars[j]));
    598            // 5iii(a). Let matchpref be an integer with the value minpref.
    599            int32_t matchpref = minpref;
    600            // 5iii(b). Let `key` be the tuple variant key at position `i`.
    601            const Key* tupleVariantKeys = tuple.keys.getKeysInternal();
    602            U_ASSERT(i < tuple.keys.len); // Given by earlier semantic checking
    603            const Key& key = tupleVariantKeys[i];
    604            // 5iii(c) If `key` is not the catch-all key '*':
    605            if (!key.isWildcard()) {
    606                // 5iii(c)(a). Assert that `key` is a literal.
    607                // (Not needed)
    608                // 5iii(c)(b). Let `ks` be the resolved value of `key`.
    609                UnicodeString ks = StandardFunctions::normalizeNFC(key.asLiteral().unquoted());
    610                // 5iii(c)(c) Let matchpref be the integer position of ks in `matches`.
    611                matchpref = vectorFind(matches, ks);
    612                U_ASSERT(matchpref >= 0);
    613            }
    614            // 5iii(d) Set the `tuple` integer value as matchpref.
    615            tuple.priority = matchpref;
    616        }
    617        // 5iv. Set `sortable` to be the result of calling the method SortVariants(`sortable`)
    618        vars.sort(comparePrioritizedVariants, status);
    619        CHECK_ERROR(status);
    620        // 5v. Set `i` to be `i` - 1.
    621        i--;
    622    }
    623    // The caller is responsible for steps 6 and 7
    624    // 6. Let `var` be the `variant` element of the first element of `sortable`.
    625    // 7. Select the pattern of `var`
    626 }
    627 
    628 void MessageFormatter::formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const {
    629    CHECK_ERROR(status);
    630 
    631    // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#pattern-selection
    632 
    633    // Resolve Selectors
    634    // res is a vector of InternalValues
    635    LocalPointer<UVector> res(createUVector(status));
    636    CHECK_ERROR(status);
    637    resolveSelectors(context, env, status, *res);
    638 
    639    // Resolve Preferences
    640    // pref is a vector of vectors of strings
    641    LocalPointer<UVector> pref(createUVector(status));
    642    CHECK_ERROR(status);
    643    resolvePreferences(context, *res, *pref, status);
    644 
    645    // Filter Variants
    646    // vars is a vector of PrioritizedVariants
    647    LocalPointer<UVector> vars(createUVector(status));
    648    CHECK_ERROR(status);
    649    filterVariants(*pref, *vars, status);
    650 
    651    // Sort Variants and select the final pattern
    652    // Note: `sortable` in the spec is just `vars` here,
    653    // which is sorted in-place
    654    sortVariants(*pref, *vars, status);
    655 
    656    CHECK_ERROR(status);
    657 
    658    // 6. Let `var` be the `variant` element of the first element of `sortable`.
    659    U_ASSERT(vars->size() > 0); // This should have been checked earlier (having 0 variants would be a data model error)
    660    const PrioritizedVariant& var = *(static_cast<PrioritizedVariant*>(vars->elementAt(0)));
    661    // 7. Select the pattern of `var`
    662    const Pattern& pat = var.pat;
    663 
    664    // Format the pattern
    665    formatPattern(context, env, pat, status, result);
    666 }
    667 
    668 // Note: this is non-const due to the function registry being non-const, which is in turn
    669 // due to the values (`FormatterFactory` objects in the map) having mutable state.
    670 // In other words, formatting a message can mutate the underlying `MessageFormatter` by changing
    671 // state within the factory objects that represent custom formatters.
    672 UnicodeString MessageFormatter::formatToString(const MessageArguments& arguments, UErrorCode &status) {
    673    EMPTY_ON_ERROR(status);
    674 
    675    // Create a new context with the given arguments and the `errors` structure
    676    MessageContext context(arguments, *errors, status);
    677    UnicodeString result;
    678 
    679    if (!(errors->hasSyntaxError() || errors->hasDataModelError())) {
    680        // Create a new environment that will store closures for all local variables
    681        // Check for unresolved variable errors
    682        // checkDeclarations needs a reference to the pointer to the environment
    683        // since it uses its `env` argument as an out-parameter. So it needs to be
    684        // temporarily not a LocalPointer...
    685        Environment* env(Environment::create(status));
    686        checkDeclarations(context, env, status);
    687        // ...and then it's adopted to avoid leaks
    688        LocalPointer<Environment> globalEnv(env);
    689 
    690        if (dataModel.hasPattern()) {
    691            formatPattern(context, *globalEnv, dataModel.getPattern(), status, result);
    692        } else {
    693            // Check for errors/warnings -- if so, then the result of pattern selection is the fallback value
    694            // See https://www.unicode.org/reports/tr35/tr35-messageFormat.html#pattern-selection
    695            const DynamicErrors& err = context.getErrors();
    696            if (err.hasSyntaxError() || err.hasDataModelError()) {
    697                result += REPLACEMENT;
    698            } else {
    699                formatSelectors(context, *globalEnv, status, result);
    700            }
    701        }
    702    }
    703 
    704    // Update status according to all errors seen while formatting
    705    if (signalErrors) {
    706        context.checkErrors(status);
    707    }
    708    if (U_FAILURE(status)) {
    709        result.remove();
    710    }
    711    return result;
    712 }
    713 
    714 // ----------------------------------------
    715 // Checking for resolution errors
    716 
    717 void MessageFormatter::check(MessageContext& context, const Environment& localEnv, const OptionMap& options, UErrorCode& status) const {
    718    // Check the RHS of each option
    719    for (int32_t i = 0; i < options.size(); i++) {
    720        const Option& opt = options.getOption(i, status);
    721        CHECK_ERROR(status);
    722        check(context, localEnv, opt.getValue(), status);
    723    }
    724 }
    725 
    726 void MessageFormatter::check(MessageContext& context, const Environment& localEnv, const Operand& rand, UErrorCode& status) const {
    727    // Nothing to check for literals
    728    if (rand.isLiteral() || rand.isNull()) {
    729        return;
    730    }
    731 
    732    // Check that variable is in scope
    733    const VariableName& var = rand.asVariable();
    734    UnicodeString normalized = StandardFunctions::normalizeNFC(var);
    735 
    736    // Check local scope
    737    if (localEnv.has(normalized)) {
    738        return;
    739    }
    740    // Check global scope
    741    context.getGlobal(normalized, status);
    742    if (status == U_ILLEGAL_ARGUMENT_ERROR) {
    743        status = U_ZERO_ERROR;
    744        context.getErrors().setUnresolvedVariable(var, status);
    745    }
    746    // Either `var` is a global, or some other error occurred.
    747    // Nothing more to do either way
    748    return;
    749 }
    750 
    751 void MessageFormatter::check(MessageContext& context, const Environment& localEnv, const Expression& expr, UErrorCode& status) const {
    752    // Check for unresolved variable errors
    753    if (expr.isFunctionCall()) {
    754        const Operator* rator = expr.getOperator(status);
    755        U_ASSERT(U_SUCCESS(status));
    756        const Operand& rand = expr.getOperand();
    757        check(context, localEnv, rand, status);
    758        check(context, localEnv, rator->getOptionsInternal(), status);
    759    }
    760 }
    761 
    762 // Check for resolution errors
    763 void MessageFormatter::checkDeclarations(MessageContext& context, Environment*& env, UErrorCode &status) const {
    764    CHECK_ERROR(status);
    765 
    766    const Binding* decls = getDataModel().getLocalVariablesInternal();
    767    U_ASSERT(env != nullptr && (decls != nullptr || getDataModel().bindingsLen == 0));
    768 
    769    for (int32_t i = 0; i < getDataModel().bindingsLen; i++) {
    770        const Binding& decl = decls[i];
    771        const Expression& rhs = decl.getValue();
    772        check(context, *env, rhs, status);
    773 
    774        // Add a closure to the global environment,
    775        // memoizing the value of localEnv up to this point
    776 
    777        // Add the LHS to the environment for checking the next declaration
    778        env = Environment::create(StandardFunctions::normalizeNFC(decl.getVariable()),
    779                                  Closure(rhs, *env),
    780                                  env,
    781                                  status);
    782        CHECK_ERROR(status);
    783    }
    784 }
    785 } // namespace message2
    786 
    787 U_NAMESPACE_END
    788 
    789 #endif /* #if !UCONFIG_NO_MF2 */
    790 
    791 #endif /* #if !UCONFIG_NO_FORMATTING */
    792 
    793 #endif /* #if !UCONFIG_NO_NORMALIZATION */