messageformat2_serializer.cpp (7897B)
1 // © 2024 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #include "unicode/utypes.h" 5 6 #if !UCONFIG_NO_NORMALIZATION 7 8 #if !UCONFIG_NO_FORMATTING 9 10 #if !UCONFIG_NO_MF2 11 12 #include "unicode/messageformat2_data_model.h" 13 #include "messageformat2_macros.h" 14 #include "messageformat2_serializer.h" 15 #include "uvector.h" // U_ASSERT 16 17 U_NAMESPACE_BEGIN 18 19 namespace message2 { 20 21 // Generates a string representation of a data model 22 // ------------------------------------------------ 23 24 using namespace data_model; 25 26 // Private helper methods 27 28 void Serializer::whitespace() { 29 result += SPACE; 30 } 31 32 void Serializer::emit(UChar32 c) { 33 result += c; 34 } 35 36 void Serializer::emit(const UnicodeString& s) { 37 result += s; 38 } 39 40 void Serializer::emit(const std::u16string_view& token) { 41 result.append(token); 42 } 43 44 void Serializer::emit(const Literal& l) { 45 if (l.isQuoted()) { 46 emit(PIPE); 47 } 48 const UnicodeString& contents = l.unquoted(); 49 for (int32_t i = 0; ((int32_t) i) < contents.length(); i++) { 50 // Re-escape any escaped-char characters 51 switch(contents[i]) { 52 case BACKSLASH: 53 case PIPE: 54 case LEFT_CURLY_BRACE: 55 case RIGHT_CURLY_BRACE: { 56 emit(BACKSLASH); 57 break; 58 } 59 default: { 60 break; 61 } 62 } 63 emit(contents[i]); 64 } 65 if (l.isQuoted()) { 66 emit(PIPE); 67 } 68 } 69 70 void Serializer::emit(const Key& k) { 71 if (k.isWildcard()) { 72 emit(ASTERISK); 73 return; 74 } 75 emit(k.asLiteral()); 76 } 77 78 void Serializer::emit(const SelectorKeys& k) { 79 const Key* ks = k.getKeysInternal(); 80 int32_t len = k.len; 81 // It would be an error for `keys` to be empty; 82 // that would mean this is the single `pattern` 83 // variant, and in that case, this method shouldn't be called 84 U_ASSERT(len > 0); 85 for (int32_t i = 0; i < len; i++) { 86 if (i != 0) { 87 whitespace(); 88 } 89 emit(ks[i]); 90 } 91 } 92 93 void Serializer::emit(const Operand& rand) { 94 U_ASSERT(!rand.isNull()); 95 96 if (rand.isVariable()) { 97 emit(DOLLAR); 98 emit(rand.asVariable()); 99 } else { 100 // Literal: quoted or unquoted 101 emit(rand.asLiteral()); 102 } 103 } 104 105 void Serializer::emit(const OptionMap& options) { 106 // Errors should have been checked before this point 107 UErrorCode localStatus = U_ZERO_ERROR; 108 U_ASSERT(!options.bogus); 109 for (int32_t i = 0; i < options.size(); i++) { 110 const Option& opt = options.getOption(i, localStatus); 111 // No need to check error code, since we already checked 112 // that !bogus 113 whitespace(); 114 emit(opt.getName()); 115 emit(EQUALS); 116 emit(opt.getValue()); 117 } 118 } 119 120 void Serializer::emitAttributes(const OptionMap& attributes) { 121 // Errors should have been checked before this point 122 UErrorCode localStatus = U_ZERO_ERROR; 123 U_ASSERT(!attributes.bogus); 124 for (int32_t i = 0; i < attributes.size(); i++) { 125 const Option& attr = attributes.getOption(i, localStatus); 126 // No need to check error code, since we already checked 127 // that !bogus 128 whitespace(); 129 emit(AT); 130 emit(attr.getName()); 131 const Operand& v = attr.getValue(); 132 if (!v.isNull()) { 133 emit(EQUALS); 134 emit(v); 135 } 136 } 137 } 138 139 void Serializer::emit(const Expression& expr) { 140 emit(LEFT_CURLY_BRACE); 141 142 if (!expr.isFunctionCall()) { 143 // Literal or variable, no annotation 144 emit(expr.getOperand()); 145 } else { 146 // Function call or reserved 147 if (!expr.isStandaloneAnnotation()) { 148 // Must be a function call that has an operand 149 emit(expr.getOperand()); 150 whitespace(); 151 } 152 UErrorCode localStatus = U_ZERO_ERROR; 153 const Operator* rator = expr.getOperator(localStatus); 154 U_ASSERT(U_SUCCESS(localStatus)); 155 emit(COLON); 156 emit(rator->getFunctionName()); 157 // No whitespace after function name, in case it has 158 // no options. (when there are options, emit(OptionMap) will 159 // emit the leading whitespace) 160 emit(rator->getOptionsInternal()); 161 } 162 emitAttributes(expr.getAttributesInternal()); 163 emit(RIGHT_CURLY_BRACE); 164 } 165 166 void Serializer::emit(const PatternPart& part) { 167 if (part.isText()) { 168 // Raw text 169 const UnicodeString& text = part.asText(); 170 // Re-escape '{'/'}'/'\''|' 171 for (int32_t i = 0; ((int32_t) i) < text.length(); i++) { 172 switch(text[i]) { 173 case PIPE: 174 case BACKSLASH: 175 case LEFT_CURLY_BRACE: 176 case RIGHT_CURLY_BRACE: { 177 emit(BACKSLASH); 178 break; 179 } 180 default: 181 break; 182 } 183 emit(text[i]); 184 } 185 return; 186 } 187 // Markup 188 if (part.isMarkup()) { 189 const Markup& markup = part.asMarkup(); 190 emit(LEFT_CURLY_BRACE); 191 if (markup.isClose()) { 192 emit(SLASH); 193 } else { 194 emit(NUMBER_SIGN); 195 } 196 emit(markup.getName()); 197 emit(markup.getOptionsInternal()); 198 emitAttributes(markup.getAttributesInternal()); 199 if (markup.isStandalone()) { 200 emit(SLASH); 201 } 202 emit(RIGHT_CURLY_BRACE); 203 return; 204 } 205 // Expression 206 emit(part.contents()); 207 } 208 209 void Serializer::emit(const Pattern& pat) { 210 int32_t len = pat.numParts(); 211 // Always quote pattern, which should match the normalized input 212 // if the parser is constructing it correctly 213 emit(LEFT_CURLY_BRACE); 214 emit(LEFT_CURLY_BRACE); 215 for (int32_t i = 0; i < len; i++) { 216 // No whitespace is needed here -- see the `pattern` nonterminal in the grammar 217 emit(pat.getPart(i)); 218 } 219 emit(RIGHT_CURLY_BRACE); 220 emit(RIGHT_CURLY_BRACE); 221 } 222 223 void Serializer::serializeDeclarations() { 224 const Binding* bindings = dataModel.getLocalVariablesInternal(); 225 U_ASSERT(dataModel.bindingsLen == 0 || bindings != nullptr); 226 227 for (int32_t i = 0; i < dataModel.bindingsLen; i++) { 228 const Binding& b = bindings[i]; 229 if (b.isLocal()) { 230 // No whitespace needed here -- see `message` in the grammar 231 emit(ID_LOCAL); 232 whitespace(); 233 emit(DOLLAR); 234 emit(b.getVariable()); 235 // No whitespace needed here -- see `local-declaration` in the grammar 236 emit(EQUALS); 237 // No whitespace needed here -- see `local-declaration` in the grammar 238 } else { 239 // Input declaration 240 emit(ID_INPUT); 241 // No whitespace needed here -- see `input-declaration` in the grammar 242 } 243 emit(b.getValue()); 244 } 245 } 246 247 void Serializer::serializeSelectors() { 248 U_ASSERT(!dataModel.hasPattern()); 249 const VariableName* selectors = dataModel.getSelectorsInternal(); 250 251 emit(ID_MATCH); 252 for (int32_t i = 0; i < dataModel.numSelectors(); i++) { 253 whitespace(); 254 emit(DOLLAR); 255 emit(selectors[i]); 256 } 257 } 258 259 void Serializer::serializeVariants() { 260 U_ASSERT(!dataModel.hasPattern()); 261 const Variant* variants = dataModel.getVariantsInternal(); 262 whitespace(); 263 for (int32_t i = 0; i < dataModel.numVariants(); i++) { 264 const Variant& v = variants[i]; 265 emit(v.getKeys()); 266 // No whitespace needed here -- see `variant` in the grammar 267 emit(v.getPattern()); 268 } 269 } 270 271 272 // Main (public) serializer method 273 void Serializer::serialize() { 274 serializeDeclarations(); 275 // Pattern message 276 if (dataModel.hasPattern()) { 277 emit(dataModel.getPattern()); 278 } else { 279 // Selectors message 280 serializeSelectors(); 281 serializeVariants(); 282 } 283 } 284 285 } // namespace message2 286 U_NAMESPACE_END 287 288 #endif /* #if !UCONFIG_NO_MF2 */ 289 290 #endif /* #if !UCONFIG_NO_FORMATTING */ 291 292 #endif /* #if !UCONFIG_NO_NORMALIZATION */