tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

messageformat2_formatter.cpp (14267B)


      1 // © 2024 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 #include "unicode/utypes.h"
      5 
      6 #if !UCONFIG_NO_NORMALIZATION
      7 
      8 #if !UCONFIG_NO_FORMATTING
      9 
     10 #if !UCONFIG_NO_MF2
     11 
     12 #include "unicode/messageformat2.h"
     13 #include "messageformat2_allocation.h"
     14 #include "messageformat2_checker.h"
     15 #include "messageformat2_errors.h"
     16 #include "messageformat2_evaluation.h"
     17 #include "messageformat2_function_registry_internal.h"
     18 #include "messageformat2_macros.h"
     19 #include "messageformat2_parser.h"
     20 #include "messageformat2_serializer.h"
     21 #include "uvector.h" // U_ASSERT
     22 
     23 U_NAMESPACE_BEGIN
     24 
     25 namespace message2 {
     26 
     27    // MessageFormatter::Builder
     28 
     29    // -------------------------------------
     30    // Creates a MessageFormat instance based on the pattern.
     31 
     32    void MessageFormatter::Builder::clearState() {
     33        normalizedInput.remove();
     34        delete errors;
     35        errors = nullptr;
     36    }
     37 
     38    MessageFormatter::Builder& MessageFormatter::Builder::setPattern(const UnicodeString& pat,
     39                                                                     UParseError& parseError,
     40                                                                     UErrorCode& errorCode) {
     41        clearState();
     42        // Create errors
     43        errors = create<StaticErrors>(StaticErrors(errorCode), errorCode);
     44        THIS_ON_ERROR(errorCode);
     45 
     46        // Parse the pattern
     47        MFDataModel::Builder tree(errorCode);
     48        Parser(pat, tree, *errors, normalizedInput, errorCode)
     49            .parse(parseError, errorCode);
     50 
     51        // Fail on syntax errors
     52        if (errors->hasSyntaxError()) {
     53            errors->checkErrors(errorCode);
     54            // Check that the checkErrors() method set the error code
     55            U_ASSERT(U_FAILURE(errorCode));
     56        }
     57 
     58        // Build the data model based on what was parsed
     59        dataModel = tree.build(errorCode);
     60        hasDataModel = true;
     61        hasPattern = true;
     62        pattern = pat;
     63 
     64        return *this;
     65    }
     66 
     67    // Precondition: `reg` is non-null
     68    // Does not adopt `reg`
     69    MessageFormatter::Builder& MessageFormatter::Builder::setFunctionRegistry(const MFFunctionRegistry& reg) {
     70        customMFFunctionRegistry = &reg;
     71        return *this;
     72    }
     73 
     74    MessageFormatter::Builder& MessageFormatter::Builder::setLocale(const Locale& loc) {
     75        locale = loc;
     76        return *this;
     77    }
     78 
     79    MessageFormatter::Builder& MessageFormatter::Builder::setDataModel(MFDataModel&& newDataModel) {
     80        clearState();
     81        hasPattern = false;
     82        hasDataModel = true;
     83        dataModel = std::move(newDataModel);
     84 
     85        return *this;
     86    }
     87 
     88    MessageFormatter::Builder&
     89        MessageFormatter::Builder::setErrorHandlingBehavior(
     90           MessageFormatter::UMFErrorHandlingBehavior type) {
     91               signalErrors = type == U_MF_STRICT;
     92               return *this;
     93    }
     94 
     95    /*
     96      This build() method is non-destructive, which entails the risk that
     97      its borrowed MFFunctionRegistry and (if the setDataModel() method was called)
     98      MFDataModel pointers could become invalidated.
     99    */
    100    MessageFormatter MessageFormatter::Builder::build(UErrorCode& errorCode) const {
    101        return MessageFormatter(*this, errorCode);
    102    }
    103 
    104    MessageFormatter::Builder::Builder(UErrorCode& errorCode) : locale(Locale::getDefault()), customMFFunctionRegistry(nullptr) {
    105        // Initialize errors
    106        errors = new StaticErrors(errorCode);
    107        CHECK_ERROR(errorCode);
    108        if (errors == nullptr) {
    109            errorCode = U_MEMORY_ALLOCATION_ERROR;
    110        }
    111    }
    112 
    113    MessageFormatter::Builder::~Builder() {
    114        if (errors != nullptr) {
    115            delete errors;
    116            errors = nullptr;
    117        }
    118    }
    119 
    120    // MessageFormatter
    121 
    122    MessageFormatter::MessageFormatter(const MessageFormatter::Builder& builder, UErrorCode &success) : locale(builder.locale), customMFFunctionRegistry(builder.customMFFunctionRegistry) {
    123        CHECK_ERROR(success);
    124 
    125        // Set up the standard function registry
    126        MFFunctionRegistry::Builder standardFunctionsBuilder(success);
    127 
    128        FormatterFactory* dateTime = StandardFunctions::DateTimeFactory::dateTime(success);
    129        FormatterFactory* date = StandardFunctions::DateTimeFactory::date(success);
    130        FormatterFactory* time = StandardFunctions::DateTimeFactory::time(success);
    131        FormatterFactory* number = new StandardFunctions::NumberFactory();
    132        FormatterFactory* integer = new StandardFunctions::IntegerFactory();
    133        standardFunctionsBuilder.adoptFormatter(FunctionName(functions::DATETIME), dateTime, success)
    134            .adoptFormatter(FunctionName(functions::DATE), date, success)
    135            .adoptFormatter(FunctionName(functions::TIME), time, success)
    136            .adoptFormatter(FunctionName(functions::NUMBER), number, success)
    137            .adoptFormatter(FunctionName(functions::INTEGER), integer, success)
    138            .adoptFormatter(FunctionName(functions::TEST_FUNCTION), new StandardFunctions::TestFormatFactory(), success)
    139            .adoptFormatter(FunctionName(functions::TEST_FORMAT), new StandardFunctions::TestFormatFactory(), success)
    140            .adoptSelector(FunctionName(functions::NUMBER), new StandardFunctions::PluralFactory(UPLURAL_TYPE_CARDINAL), success)
    141            .adoptSelector(FunctionName(functions::INTEGER), new StandardFunctions::PluralFactory(StandardFunctions::PluralFactory::integer()), success)
    142            .adoptSelector(FunctionName(functions::STRING), new StandardFunctions::TextFactory(), success)
    143            .adoptSelector(FunctionName(functions::TEST_FUNCTION), new StandardFunctions::TestSelectFactory(), success)
    144            .adoptSelector(FunctionName(functions::TEST_SELECT), new StandardFunctions::TestSelectFactory(), success);
    145        CHECK_ERROR(success);
    146        standardMFFunctionRegistry = standardFunctionsBuilder.build();
    147        CHECK_ERROR(success);
    148        standardMFFunctionRegistry.checkStandard();
    149 
    150        normalizedInput = builder.normalizedInput;
    151        signalErrors = builder.signalErrors;
    152 
    153        // Build data model
    154        // First, check that there is a data model
    155        // (which might have been set by setDataModel(), or to
    156        // the data model parsed from the pattern by setPattern())
    157 
    158        if (!builder.hasDataModel) {
    159            success = U_INVALID_STATE_ERROR;
    160            return;
    161        }
    162 
    163        dataModel = builder.dataModel;
    164        if (builder.errors != nullptr) {
    165            errors = new StaticErrors(*builder.errors, success);
    166        } else {
    167            // Initialize errors
    168            LocalPointer<StaticErrors> errorsNew(new StaticErrors(success));
    169            CHECK_ERROR(success);
    170            errors = errorsNew.orphan();
    171        }
    172 
    173        // Note: we currently evaluate variables lazily,
    174        // without memoization. This call is still necessary
    175        // to check out-of-scope uses of local variables in
    176        // right-hand sides (unresolved variable errors can
    177        // only be checked when arguments are known)
    178 
    179        // Check for resolution errors
    180        Checker(dataModel, *errors, *this).check(success);
    181    }
    182 
    183    void MessageFormatter::cleanup() noexcept {
    184        if (errors != nullptr) {
    185            delete errors;
    186            errors = nullptr;
    187        }
    188    }
    189 
    190    MessageFormatter& MessageFormatter::operator=(MessageFormatter&& other) noexcept {
    191        cleanup();
    192 
    193        locale = std::move(other.locale);
    194        standardMFFunctionRegistry = std::move(other.standardMFFunctionRegistry);
    195        customMFFunctionRegistry = other.customMFFunctionRegistry;
    196        dataModel = std::move(other.dataModel);
    197        normalizedInput = std::move(other.normalizedInput);
    198        signalErrors = other.signalErrors;
    199        errors = other.errors;
    200        other.errors = nullptr;
    201        return *this;
    202    }
    203 
    204    const MFDataModel& MessageFormatter::getDataModel() const { return dataModel; }
    205 
    206    UnicodeString MessageFormatter::getPattern() const {
    207        // Converts the current data model back to a string
    208        UnicodeString result;
    209        Serializer serializer(getDataModel(), result);
    210        serializer.serialize();
    211        return result;
    212    }
    213 
    214    // Precondition: custom function registry exists
    215    const MFFunctionRegistry& MessageFormatter::getCustomMFFunctionRegistry() const {
    216        U_ASSERT(hasCustomMFFunctionRegistry());
    217        return *customMFFunctionRegistry;
    218    }
    219 
    220    MessageFormatter::~MessageFormatter() {
    221        cleanup();
    222    }
    223 
    224    // Selector and formatter lookup
    225    // -----------------------------
    226 
    227    // Postcondition: selector != nullptr || U_FAILURE(status)
    228    Selector* MessageFormatter::getSelector(MessageContext& context, const FunctionName& functionName, UErrorCode& status) const {
    229        NULL_ON_ERROR(status);
    230        U_ASSERT(isSelector(functionName));
    231 
    232        const SelectorFactory* selectorFactory = lookupSelectorFactory(context, functionName, status);
    233        NULL_ON_ERROR(status);
    234        if (selectorFactory == nullptr) {
    235            status = U_MEMORY_ALLOCATION_ERROR;
    236            return nullptr;
    237        }
    238        // Create a specific instance of the selector
    239        auto result = selectorFactory->createSelector(getLocale(), status);
    240        NULL_ON_ERROR(status);
    241        return result;
    242    }
    243 
    244    // Returns an owned pointer
    245    Formatter* MessageFormatter::getFormatter(const FunctionName& functionName, UErrorCode& status) const {
    246        NULL_ON_ERROR(status);
    247 
    248        // Create the formatter
    249 
    250        // First, look up the formatter factory for this function
    251        FormatterFactory* formatterFactory = lookupFormatterFactory(functionName, status);
    252        NULL_ON_ERROR(status);
    253 
    254        U_ASSERT(formatterFactory != nullptr);
    255 
    256        // Create a specific instance of the formatter
    257        Formatter* formatter = formatterFactory->createFormatter(locale, status);
    258        NULL_ON_ERROR(status);
    259        if (formatter == nullptr) {
    260            status = U_MEMORY_ALLOCATION_ERROR;
    261            return nullptr;
    262        }
    263        return formatter;
    264    }
    265 
    266    bool MessageFormatter::getDefaultFormatterNameByType(const UnicodeString& type,
    267                                                         FunctionName& name) const {
    268        if (!hasCustomMFFunctionRegistry()) {
    269            return false;
    270        }
    271        const MFFunctionRegistry& reg = getCustomMFFunctionRegistry();
    272        return reg.getDefaultFormatterNameByType(type, name);
    273    }
    274 
    275    // ---------------------------------------------------
    276    // Function registry
    277 
    278    bool MessageFormatter::isBuiltInSelector(const FunctionName& functionName) const {
    279        return standardMFFunctionRegistry.hasSelector(functionName);
    280    }
    281 
    282    bool MessageFormatter::isBuiltInFormatter(const FunctionName& functionName) const {
    283        return standardMFFunctionRegistry.hasFormatter(functionName);
    284    }
    285 
    286    // https://github.com/unicode-org/message-format-wg/issues/409
    287    // Unknown function = unknown function error
    288    // Formatter used as selector  = selector error
    289    // Selector used as formatter = formatting error
    290    const SelectorFactory* MessageFormatter::lookupSelectorFactory(MessageContext& context, const FunctionName& functionName, UErrorCode& status) const {
    291        DynamicErrors& err = context.getErrors();
    292 
    293        if (isBuiltInSelector(functionName)) {
    294            return standardMFFunctionRegistry.getSelector(functionName);
    295        }
    296        if (isBuiltInFormatter(functionName)) {
    297            err.setSelectorError(functionName, status);
    298            return nullptr;
    299        }
    300        if (hasCustomMFFunctionRegistry()) {
    301            const MFFunctionRegistry& customMFFunctionRegistry = getCustomMFFunctionRegistry();
    302            const SelectorFactory* selectorFactory = customMFFunctionRegistry.getSelector(functionName);
    303            if (selectorFactory != nullptr) {
    304                return selectorFactory;
    305            }
    306            if (customMFFunctionRegistry.getFormatter(functionName) != nullptr) {
    307                err.setSelectorError(functionName, status);
    308                return nullptr;
    309            }
    310        }
    311        // Either there is no custom function registry and the function
    312        // isn't built-in, or the function doesn't exist in either the built-in
    313        // or custom registry.
    314        // Unknown function error
    315        err.setUnknownFunction(functionName, status);
    316        return nullptr;
    317    }
    318 
    319    FormatterFactory* MessageFormatter::lookupFormatterFactory(const FunctionName& functionName,
    320                                                               UErrorCode& status) const {
    321        NULL_ON_ERROR(status);
    322 
    323        if (isBuiltInFormatter(functionName)) {
    324            return standardMFFunctionRegistry.getFormatter(functionName);
    325        }
    326        if (isBuiltInSelector(functionName)) {
    327            status = U_MF_FORMATTING_ERROR;
    328            return nullptr;
    329        }
    330        if (hasCustomMFFunctionRegistry()) {
    331            const MFFunctionRegistry& customMFFunctionRegistry = getCustomMFFunctionRegistry();
    332            FormatterFactory* formatterFactory = customMFFunctionRegistry.getFormatter(functionName);
    333            if (formatterFactory != nullptr) {
    334                return formatterFactory;
    335            }
    336            if (customMFFunctionRegistry.getSelector(functionName) != nullptr) {
    337                status = U_MF_FORMATTING_ERROR;
    338                return nullptr;
    339            }
    340        }
    341        // Either there is no custom function registry and the function
    342        // isn't built-in, or the function doesn't exist in either the built-in
    343        // or custom registry.
    344        // Unknown function error
    345        status = U_MF_UNKNOWN_FUNCTION_ERROR;
    346        return nullptr;
    347    }
    348 
    349    bool MessageFormatter::isCustomFormatter(const FunctionName& fn) const {
    350        return hasCustomMFFunctionRegistry() && getCustomMFFunctionRegistry().getFormatter(fn) != nullptr;
    351    }
    352 
    353 
    354    bool MessageFormatter::isCustomSelector(const FunctionName& fn) const {
    355        return hasCustomMFFunctionRegistry() && getCustomMFFunctionRegistry().getSelector(fn) != nullptr;
    356    }
    357 
    358 } // namespace message2
    359 
    360 U_NAMESPACE_END
    361 
    362 #endif /* #if !UCONFIG_NO_MF2 */
    363 
    364 #endif /* #if !UCONFIG_NO_FORMATTING */
    365 
    366 #endif /* #if !UCONFIG_NO_NORMALIZATION */