tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

messageformat2_checker.cpp (10826B)


      1 // © 2024 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 #include "unicode/utypes.h"
      5 
      6 #if !UCONFIG_NO_NORMALIZATION
      7 
      8 #if !UCONFIG_NO_FORMATTING
      9 
     10 #if !UCONFIG_NO_MF2
     11 
     12 #include "unicode/messageformat2.h"
     13 #include "messageformat2_allocation.h"
     14 #include "messageformat2_checker.h"
     15 #include "messageformat2_evaluation.h"
     16 #include "messageformat2_function_registry_internal.h"
     17 #include "messageformat2_macros.h"
     18 #include "uvector.h" // U_ASSERT
     19 
     20 U_NAMESPACE_BEGIN
     21 
     22 namespace message2 {
     23 
     24 /*
     25 Checks data model errors
     26 (see https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#error-handling )
     27 
     28 The following are checked here:
     29 Variant Key Mismatch
     30 Duplicate Variant
     31 Missing Fallback Variant (called NonexhaustivePattern here)
     32 Missing Selector Annotation
     33 Duplicate Declaration
     34  - Most duplicate declaration errors are checked by the parser,
     35    but the checker checks for declarations of input variables
     36    that were previously implicitly declared
     37 (Duplicate option names and duplicate declarations are checked by the parser)
     38 */
     39 
     40 // Type environments
     41 // -----------------
     42 
     43 TypeEnvironment::TypeEnvironment(UErrorCode& status) {
     44    CHECK_ERROR(status);
     45 
     46    UVector* temp;
     47    temp = createStringVectorNoAdopt(status);
     48    CHECK_ERROR(status);
     49    annotated.adoptInstead(temp);
     50    temp = createStringVectorNoAdopt(status);
     51    CHECK_ERROR(status);
     52    unannotated.adoptInstead(temp);
     53    temp = createStringVectorNoAdopt(status);
     54    CHECK_ERROR(status);
     55    freeVars.adoptInstead(temp);
     56 }
     57 
     58 static bool has(const UVector& v, const VariableName& var) {
     59     return v.contains(const_cast<void*>(static_cast<const void*>(&var)));
     60 }
     61 
     62 // Returns true if `var` was either previously used (implicit declaration),
     63 // or is in scope by an explicit declaration
     64 bool TypeEnvironment::known(const VariableName& var) const {
     65    return has(*annotated, var) || has(*unannotated, var) || has(*freeVars, var);
     66 }
     67 
     68 TypeEnvironment::Type TypeEnvironment::get(const VariableName& var) const {
     69    U_ASSERT(annotated.isValid());
     70    if (has(*annotated, var)) {
     71        return Annotated;
     72    }
     73    U_ASSERT(unannotated.isValid());
     74    if (has(*unannotated, var)) {
     75        return Unannotated;
     76    }
     77    U_ASSERT(freeVars.isValid());
     78    if (has(*freeVars, var)) {
     79        return FreeVariable;
     80    }
     81    // This case is a "free variable without an implicit declaration",
     82    // i.e. one used only in a selector expression and not in a declaration RHS
     83    return Unannotated;
     84 }
     85 
     86 void TypeEnvironment::extend(const VariableName& var, TypeEnvironment::Type t, UErrorCode& status) {
     87    if (t == Unannotated) {
     88        U_ASSERT(unannotated.isValid());
     89        // See comment below
     90        unannotated->addElement(const_cast<void*>(static_cast<const void*>(&var)), status);
     91        return;
     92    }
     93 
     94    if (t == FreeVariable) {
     95        U_ASSERT(freeVars.isValid());
     96        // See comment below
     97        freeVars->addElement(const_cast<void*>(static_cast<const void*>(&var)), status);
     98        return;
     99    }
    100 
    101    U_ASSERT(annotated.isValid());
    102    // This is safe because elements of `annotated` are never written
    103    // and the lifetime of `var` is guaranteed to include the lifetime of
    104    // `annotated`
    105    annotated->addElement(const_cast<void*>(static_cast<const void*>(&var)), status);
    106 }
    107 
    108 TypeEnvironment::~TypeEnvironment() {}
    109 
    110 // ---------------------
    111 
    112 Key Checker::normalizeNFC(const Key& k) const {
    113    if (k.isWildcard()) {
    114        return k;
    115    }
    116    return Key(Literal(k.asLiteral().isQuoted(),
    117                       StandardFunctions::normalizeNFC(k.asLiteral().unquoted())));
    118 }
    119 
    120 static bool areDefaultKeys(const Key* keys, int32_t len) {
    121    U_ASSERT(len > 0);
    122    for (int32_t i = 0; i < len; i++) {
    123        if (!keys[i].isWildcard()) {
    124            return false;
    125        }
    126    }
    127    return true;
    128 }
    129 
    130 void Checker::addFreeVars(TypeEnvironment& t, const Operand& rand, UErrorCode& status) {
    131    CHECK_ERROR(status);
    132 
    133    if (rand.isVariable()) {
    134        const VariableName& v = rand.asVariable();
    135        if (!t.known(v)) {
    136            t.extend(v, TypeEnvironment::Type::FreeVariable, status);
    137        }
    138    }
    139 }
    140 
    141 void Checker::addFreeVars(TypeEnvironment& t, const OptionMap& opts, UErrorCode& status) {
    142    for (int32_t i = 0; i < opts.size(); i++) {
    143        const Option& o = opts.getOption(i, status);
    144        CHECK_ERROR(status);
    145        addFreeVars(t, o.getValue(), status);
    146    }
    147 }
    148 
    149 void Checker::addFreeVars(TypeEnvironment& t, const Operator& rator, UErrorCode& status) {
    150    CHECK_ERROR(status);
    151 
    152    addFreeVars(t, rator.getOptionsInternal(), status);
    153 }
    154 
    155 void Checker::addFreeVars(TypeEnvironment& t, const Expression& rhs, UErrorCode& status) {
    156    CHECK_ERROR(status);
    157 
    158    if (rhs.isFunctionCall()) {
    159        const Operator* rator = rhs.getOperator(status);
    160        U_ASSERT(U_SUCCESS(status));
    161        addFreeVars(t, *rator, status);
    162    }
    163    addFreeVars(t, rhs.getOperand(), status);
    164 }
    165 
    166 void Checker::checkVariants(UErrorCode& status) {
    167    CHECK_ERROR(status);
    168 
    169    U_ASSERT(!dataModel.hasPattern());
    170 
    171    // Check that each variant has a key list with size
    172    // equal to the number of selectors
    173    const Variant* variants = dataModel.getVariantsInternal();
    174 
    175    // Check that one variant includes only wildcards
    176    bool defaultExists = false;
    177    bool duplicatesExist = false;
    178 
    179    for (int32_t i = 0; i < dataModel.numVariants(); i++) {
    180        const SelectorKeys& k = variants[i].getKeys();
    181        const Key* keys = k.getKeysInternal();
    182        int32_t len = k.len;
    183        if (len != dataModel.numSelectors()) {
    184            // Variant key mismatch
    185            errors.addError(StaticErrorType::VariantKeyMismatchError, status);
    186            return;
    187        }
    188        defaultExists |= areDefaultKeys(keys, len);
    189 
    190        // Check if this variant's keys are duplicated by any other variant's keys
    191        if (!duplicatesExist) {
    192            // This check takes quadratic time, but it can be optimized if checking
    193            // this property turns out to be a bottleneck.
    194            for (int32_t j = 0; j < i; j++) {
    195                const SelectorKeys& k1 = variants[j].getKeys();
    196                const Key* keys1 = k1.getKeysInternal();
    197                bool allEqual = true;
    198                // This variant was already checked,
    199                // so we know keys1.len == len
    200                for (int32_t kk = 0; kk < len; kk++) {
    201                    if (!(normalizeNFC(keys[kk]) == normalizeNFC(keys1[kk]))) {
    202                        allEqual = false;
    203                        break;
    204                    }
    205                }
    206                if (allEqual) {
    207                    duplicatesExist = true;
    208                }
    209            }
    210        }
    211    }
    212 
    213    if (duplicatesExist) {
    214        errors.addError(StaticErrorType::DuplicateVariant, status);
    215    }
    216    if (!defaultExists) {
    217        errors.addError(StaticErrorType::NonexhaustivePattern, status);
    218    }
    219 }
    220 
    221 void Checker::requireAnnotated(const TypeEnvironment& t,
    222                               const VariableName& selectorVar,
    223                               UErrorCode& status) {
    224    CHECK_ERROR(status);
    225 
    226    if (t.get(selectorVar) == TypeEnvironment::Type::Annotated) {
    227        return; // No error
    228    }
    229    // If this code is reached, an error was detected
    230    errors.addError(StaticErrorType::MissingSelectorAnnotation, status);
    231 }
    232 
    233 void Checker::checkSelectors(const TypeEnvironment& t, UErrorCode& status) {
    234    U_ASSERT(!dataModel.hasPattern());
    235 
    236    // Check each selector; if it's not annotated, emit a
    237    // "missing selector annotation" error
    238    const VariableName* selectors = dataModel.getSelectorsInternal();
    239    for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
    240        requireAnnotated(t, selectors[i], status);
    241    }
    242 }
    243 
    244 TypeEnvironment::Type typeOf(TypeEnvironment& t, const Expression& expr) {
    245    if (expr.isFunctionCall()) {
    246        return TypeEnvironment::Type::Annotated;
    247    }
    248    const Operand& rand = expr.getOperand();
    249    U_ASSERT(!rand.isNull());
    250    if (rand.isLiteral()) {
    251        return TypeEnvironment::Type::Unannotated;
    252    }
    253    U_ASSERT(rand.isVariable());
    254    return t.get(rand.asVariable());
    255 }
    256 
    257 void Checker::checkDeclarations(TypeEnvironment& t, UErrorCode& status) {
    258    CHECK_ERROR(status);
    259 
    260    // For each declaration, extend the type environment with its type
    261    // Only a very simple type system is necessary: variables
    262    // have the type "annotated", "unannotated", or "free".
    263    // For "missing selector annotation" checking, free variables
    264    // (message arguments) are treated as unannotated.
    265    // Free variables are also used for checking duplicate declarations.
    266    const Binding* env = dataModel.getLocalVariablesInternal();
    267    for (int32_t i = 0; i < dataModel.bindingsLen; i++) {
    268        const Binding& b = env[i];
    269        const VariableName& lhs = b.getVariable();
    270        const Expression& rhs = b.getValue();
    271 
    272        // First, add free variables from the RHS of b
    273        // This must be done first so we can catch:
    274        // .local $foo = {$foo}
    275        // (where the RHS is the first use of $foo)
    276        if (b.isLocal()) {
    277            addFreeVars(t, rhs, status);
    278 
    279            // Next, check if the LHS equals any free variables
    280            // whose implicit declarations are in scope
    281            if (t.known(lhs) && t.get(lhs) == TypeEnvironment::Type::FreeVariable) {
    282                errors.addError(StaticErrorType::DuplicateDeclarationError, status);
    283            }
    284        } else {
    285            // Input declaration; if b has no annotation, there's nothing to check
    286            if (!b.isLocal() && b.hasAnnotation()) {
    287                const OptionMap& opts = b.getOptionsInternal();
    288                // For .input declarations, we just need to add any variables
    289                // referenced in the options
    290                addFreeVars(t, opts, status);
    291             }
    292            // Next, check if the LHS equals any free variables
    293            // whose implicit declarations are in scope
    294            if (t.known(lhs) && t.get(lhs) == TypeEnvironment::Type::FreeVariable) {
    295                errors.addError(StaticErrorType::DuplicateDeclarationError, status);
    296            }
    297        }
    298        // Next, extend the type environment with a binding from lhs to its type
    299        t.extend(lhs, typeOf(t, rhs), status);
    300    }
    301 }
    302 
    303 void Checker::check(UErrorCode& status) {
    304    CHECK_ERROR(status);
    305 
    306    TypeEnvironment typeEnv(status);
    307    checkDeclarations(typeEnv, status);
    308    // Pattern message
    309    if (dataModel.hasPattern()) {
    310        return;
    311    } else {
    312      // Selectors message
    313      checkSelectors(typeEnv, status);
    314      checkVariants(status);
    315    }
    316 }
    317 
    318 } // namespace message2
    319 U_NAMESPACE_END
    320 
    321 #endif /* #if !UCONFIG_NO_MF2 */
    322 
    323 #endif /* #if !UCONFIG_NO_FORMATTING */
    324 
    325 #endif /* #if !UCONFIG_NO_NORMALIZATION */