messageformat2_checker.cpp (10826B)
1 // © 2024 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #include "unicode/utypes.h" 5 6 #if !UCONFIG_NO_NORMALIZATION 7 8 #if !UCONFIG_NO_FORMATTING 9 10 #if !UCONFIG_NO_MF2 11 12 #include "unicode/messageformat2.h" 13 #include "messageformat2_allocation.h" 14 #include "messageformat2_checker.h" 15 #include "messageformat2_evaluation.h" 16 #include "messageformat2_function_registry_internal.h" 17 #include "messageformat2_macros.h" 18 #include "uvector.h" // U_ASSERT 19 20 U_NAMESPACE_BEGIN 21 22 namespace message2 { 23 24 /* 25 Checks data model errors 26 (see https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#error-handling ) 27 28 The following are checked here: 29 Variant Key Mismatch 30 Duplicate Variant 31 Missing Fallback Variant (called NonexhaustivePattern here) 32 Missing Selector Annotation 33 Duplicate Declaration 34 - Most duplicate declaration errors are checked by the parser, 35 but the checker checks for declarations of input variables 36 that were previously implicitly declared 37 (Duplicate option names and duplicate declarations are checked by the parser) 38 */ 39 40 // Type environments 41 // ----------------- 42 43 TypeEnvironment::TypeEnvironment(UErrorCode& status) { 44 CHECK_ERROR(status); 45 46 UVector* temp; 47 temp = createStringVectorNoAdopt(status); 48 CHECK_ERROR(status); 49 annotated.adoptInstead(temp); 50 temp = createStringVectorNoAdopt(status); 51 CHECK_ERROR(status); 52 unannotated.adoptInstead(temp); 53 temp = createStringVectorNoAdopt(status); 54 CHECK_ERROR(status); 55 freeVars.adoptInstead(temp); 56 } 57 58 static bool has(const UVector& v, const VariableName& var) { 59 return v.contains(const_cast<void*>(static_cast<const void*>(&var))); 60 } 61 62 // Returns true if `var` was either previously used (implicit declaration), 63 // or is in scope by an explicit declaration 64 bool TypeEnvironment::known(const VariableName& var) const { 65 return has(*annotated, var) || has(*unannotated, var) || has(*freeVars, var); 66 } 67 68 TypeEnvironment::Type TypeEnvironment::get(const VariableName& var) const { 69 U_ASSERT(annotated.isValid()); 70 if (has(*annotated, var)) { 71 return Annotated; 72 } 73 U_ASSERT(unannotated.isValid()); 74 if (has(*unannotated, var)) { 75 return Unannotated; 76 } 77 U_ASSERT(freeVars.isValid()); 78 if (has(*freeVars, var)) { 79 return FreeVariable; 80 } 81 // This case is a "free variable without an implicit declaration", 82 // i.e. one used only in a selector expression and not in a declaration RHS 83 return Unannotated; 84 } 85 86 void TypeEnvironment::extend(const VariableName& var, TypeEnvironment::Type t, UErrorCode& status) { 87 if (t == Unannotated) { 88 U_ASSERT(unannotated.isValid()); 89 // See comment below 90 unannotated->addElement(const_cast<void*>(static_cast<const void*>(&var)), status); 91 return; 92 } 93 94 if (t == FreeVariable) { 95 U_ASSERT(freeVars.isValid()); 96 // See comment below 97 freeVars->addElement(const_cast<void*>(static_cast<const void*>(&var)), status); 98 return; 99 } 100 101 U_ASSERT(annotated.isValid()); 102 // This is safe because elements of `annotated` are never written 103 // and the lifetime of `var` is guaranteed to include the lifetime of 104 // `annotated` 105 annotated->addElement(const_cast<void*>(static_cast<const void*>(&var)), status); 106 } 107 108 TypeEnvironment::~TypeEnvironment() {} 109 110 // --------------------- 111 112 Key Checker::normalizeNFC(const Key& k) const { 113 if (k.isWildcard()) { 114 return k; 115 } 116 return Key(Literal(k.asLiteral().isQuoted(), 117 StandardFunctions::normalizeNFC(k.asLiteral().unquoted()))); 118 } 119 120 static bool areDefaultKeys(const Key* keys, int32_t len) { 121 U_ASSERT(len > 0); 122 for (int32_t i = 0; i < len; i++) { 123 if (!keys[i].isWildcard()) { 124 return false; 125 } 126 } 127 return true; 128 } 129 130 void Checker::addFreeVars(TypeEnvironment& t, const Operand& rand, UErrorCode& status) { 131 CHECK_ERROR(status); 132 133 if (rand.isVariable()) { 134 const VariableName& v = rand.asVariable(); 135 if (!t.known(v)) { 136 t.extend(v, TypeEnvironment::Type::FreeVariable, status); 137 } 138 } 139 } 140 141 void Checker::addFreeVars(TypeEnvironment& t, const OptionMap& opts, UErrorCode& status) { 142 for (int32_t i = 0; i < opts.size(); i++) { 143 const Option& o = opts.getOption(i, status); 144 CHECK_ERROR(status); 145 addFreeVars(t, o.getValue(), status); 146 } 147 } 148 149 void Checker::addFreeVars(TypeEnvironment& t, const Operator& rator, UErrorCode& status) { 150 CHECK_ERROR(status); 151 152 addFreeVars(t, rator.getOptionsInternal(), status); 153 } 154 155 void Checker::addFreeVars(TypeEnvironment& t, const Expression& rhs, UErrorCode& status) { 156 CHECK_ERROR(status); 157 158 if (rhs.isFunctionCall()) { 159 const Operator* rator = rhs.getOperator(status); 160 U_ASSERT(U_SUCCESS(status)); 161 addFreeVars(t, *rator, status); 162 } 163 addFreeVars(t, rhs.getOperand(), status); 164 } 165 166 void Checker::checkVariants(UErrorCode& status) { 167 CHECK_ERROR(status); 168 169 U_ASSERT(!dataModel.hasPattern()); 170 171 // Check that each variant has a key list with size 172 // equal to the number of selectors 173 const Variant* variants = dataModel.getVariantsInternal(); 174 175 // Check that one variant includes only wildcards 176 bool defaultExists = false; 177 bool duplicatesExist = false; 178 179 for (int32_t i = 0; i < dataModel.numVariants(); i++) { 180 const SelectorKeys& k = variants[i].getKeys(); 181 const Key* keys = k.getKeysInternal(); 182 int32_t len = k.len; 183 if (len != dataModel.numSelectors()) { 184 // Variant key mismatch 185 errors.addError(StaticErrorType::VariantKeyMismatchError, status); 186 return; 187 } 188 defaultExists |= areDefaultKeys(keys, len); 189 190 // Check if this variant's keys are duplicated by any other variant's keys 191 if (!duplicatesExist) { 192 // This check takes quadratic time, but it can be optimized if checking 193 // this property turns out to be a bottleneck. 194 for (int32_t j = 0; j < i; j++) { 195 const SelectorKeys& k1 = variants[j].getKeys(); 196 const Key* keys1 = k1.getKeysInternal(); 197 bool allEqual = true; 198 // This variant was already checked, 199 // so we know keys1.len == len 200 for (int32_t kk = 0; kk < len; kk++) { 201 if (!(normalizeNFC(keys[kk]) == normalizeNFC(keys1[kk]))) { 202 allEqual = false; 203 break; 204 } 205 } 206 if (allEqual) { 207 duplicatesExist = true; 208 } 209 } 210 } 211 } 212 213 if (duplicatesExist) { 214 errors.addError(StaticErrorType::DuplicateVariant, status); 215 } 216 if (!defaultExists) { 217 errors.addError(StaticErrorType::NonexhaustivePattern, status); 218 } 219 } 220 221 void Checker::requireAnnotated(const TypeEnvironment& t, 222 const VariableName& selectorVar, 223 UErrorCode& status) { 224 CHECK_ERROR(status); 225 226 if (t.get(selectorVar) == TypeEnvironment::Type::Annotated) { 227 return; // No error 228 } 229 // If this code is reached, an error was detected 230 errors.addError(StaticErrorType::MissingSelectorAnnotation, status); 231 } 232 233 void Checker::checkSelectors(const TypeEnvironment& t, UErrorCode& status) { 234 U_ASSERT(!dataModel.hasPattern()); 235 236 // Check each selector; if it's not annotated, emit a 237 // "missing selector annotation" error 238 const VariableName* selectors = dataModel.getSelectorsInternal(); 239 for (int32_t i = 0; i < dataModel.numSelectors(); i++) { 240 requireAnnotated(t, selectors[i], status); 241 } 242 } 243 244 TypeEnvironment::Type typeOf(TypeEnvironment& t, const Expression& expr) { 245 if (expr.isFunctionCall()) { 246 return TypeEnvironment::Type::Annotated; 247 } 248 const Operand& rand = expr.getOperand(); 249 U_ASSERT(!rand.isNull()); 250 if (rand.isLiteral()) { 251 return TypeEnvironment::Type::Unannotated; 252 } 253 U_ASSERT(rand.isVariable()); 254 return t.get(rand.asVariable()); 255 } 256 257 void Checker::checkDeclarations(TypeEnvironment& t, UErrorCode& status) { 258 CHECK_ERROR(status); 259 260 // For each declaration, extend the type environment with its type 261 // Only a very simple type system is necessary: variables 262 // have the type "annotated", "unannotated", or "free". 263 // For "missing selector annotation" checking, free variables 264 // (message arguments) are treated as unannotated. 265 // Free variables are also used for checking duplicate declarations. 266 const Binding* env = dataModel.getLocalVariablesInternal(); 267 for (int32_t i = 0; i < dataModel.bindingsLen; i++) { 268 const Binding& b = env[i]; 269 const VariableName& lhs = b.getVariable(); 270 const Expression& rhs = b.getValue(); 271 272 // First, add free variables from the RHS of b 273 // This must be done first so we can catch: 274 // .local $foo = {$foo} 275 // (where the RHS is the first use of $foo) 276 if (b.isLocal()) { 277 addFreeVars(t, rhs, status); 278 279 // Next, check if the LHS equals any free variables 280 // whose implicit declarations are in scope 281 if (t.known(lhs) && t.get(lhs) == TypeEnvironment::Type::FreeVariable) { 282 errors.addError(StaticErrorType::DuplicateDeclarationError, status); 283 } 284 } else { 285 // Input declaration; if b has no annotation, there's nothing to check 286 if (!b.isLocal() && b.hasAnnotation()) { 287 const OptionMap& opts = b.getOptionsInternal(); 288 // For .input declarations, we just need to add any variables 289 // referenced in the options 290 addFreeVars(t, opts, status); 291 } 292 // Next, check if the LHS equals any free variables 293 // whose implicit declarations are in scope 294 if (t.known(lhs) && t.get(lhs) == TypeEnvironment::Type::FreeVariable) { 295 errors.addError(StaticErrorType::DuplicateDeclarationError, status); 296 } 297 } 298 // Next, extend the type environment with a binding from lhs to its type 299 t.extend(lhs, typeOf(t, rhs), status); 300 } 301 } 302 303 void Checker::check(UErrorCode& status) { 304 CHECK_ERROR(status); 305 306 TypeEnvironment typeEnv(status); 307 checkDeclarations(typeEnv, status); 308 // Pattern message 309 if (dataModel.hasPattern()) { 310 return; 311 } else { 312 // Selectors message 313 checkSelectors(typeEnv, status); 314 checkVariants(status); 315 } 316 } 317 318 } // namespace message2 319 U_NAMESPACE_END 320 321 #endif /* #if !UCONFIG_NO_MF2 */ 322 323 #endif /* #if !UCONFIG_NO_FORMATTING */ 324 325 #endif /* #if !UCONFIG_NO_NORMALIZATION */