tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

messageformat2_serializer.cpp (7897B)


      1 // © 2024 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 #include "unicode/utypes.h"
      5 
      6 #if !UCONFIG_NO_NORMALIZATION
      7 
      8 #if !UCONFIG_NO_FORMATTING
      9 
     10 #if !UCONFIG_NO_MF2
     11 
     12 #include "unicode/messageformat2_data_model.h"
     13 #include "messageformat2_macros.h"
     14 #include "messageformat2_serializer.h"
     15 #include "uvector.h" // U_ASSERT
     16 
     17 U_NAMESPACE_BEGIN
     18 
     19 namespace message2 {
     20 
     21 // Generates a string representation of a data model
     22 // ------------------------------------------------
     23 
     24 using namespace data_model;
     25 
     26 // Private helper methods
     27 
     28 void Serializer::whitespace() {
     29    result += SPACE;
     30 }
     31 
     32 void Serializer::emit(UChar32 c) {
     33    result += c;
     34 }
     35 
     36 void Serializer::emit(const UnicodeString& s) {
     37    result += s;
     38 }
     39 
     40 void Serializer::emit(const std::u16string_view& token) {
     41    result.append(token);
     42 }
     43 
     44 void Serializer::emit(const Literal& l) {
     45    if (l.isQuoted()) {
     46      emit(PIPE);
     47    }
     48    const UnicodeString& contents = l.unquoted();
     49    for (int32_t i = 0; ((int32_t) i) < contents.length(); i++) {
     50        // Re-escape any escaped-char characters
     51        switch(contents[i]) {
     52        case BACKSLASH:
     53        case PIPE:
     54        case LEFT_CURLY_BRACE:
     55        case RIGHT_CURLY_BRACE: {
     56            emit(BACKSLASH);
     57            break;
     58        }
     59        default: {
     60            break;
     61        }
     62        }
     63        emit(contents[i]);
     64    }
     65    if (l.isQuoted()) {
     66        emit(PIPE);
     67    }
     68 }
     69 
     70 void Serializer::emit(const Key& k) {
     71    if (k.isWildcard()) {
     72        emit(ASTERISK);
     73        return;
     74    }
     75    emit(k.asLiteral());
     76 }
     77 
     78 void Serializer::emit(const SelectorKeys& k) {
     79  const Key* ks = k.getKeysInternal();
     80  int32_t len = k.len;
     81  // It would be an error for `keys` to be empty;
     82  // that would mean this is the single `pattern`
     83  // variant, and in that case, this method shouldn't be called
     84  U_ASSERT(len > 0);
     85  for (int32_t i = 0; i < len; i++) {
     86    if (i != 0) {
     87      whitespace();
     88    }
     89    emit(ks[i]);
     90  }
     91 }
     92 
     93 void Serializer::emit(const Operand& rand) {
     94    U_ASSERT(!rand.isNull());
     95 
     96    if (rand.isVariable()) {
     97        emit(DOLLAR);
     98        emit(rand.asVariable());
     99    } else {
    100        // Literal: quoted or unquoted
    101        emit(rand.asLiteral());
    102    }
    103 }
    104 
    105 void Serializer::emit(const OptionMap& options) {
    106    // Errors should have been checked before this point
    107    UErrorCode localStatus = U_ZERO_ERROR;
    108    U_ASSERT(!options.bogus);
    109    for (int32_t i = 0; i < options.size(); i++) {
    110        const Option& opt = options.getOption(i, localStatus);
    111        // No need to check error code, since we already checked
    112        // that !bogus
    113        whitespace();
    114        emit(opt.getName());
    115        emit(EQUALS);
    116        emit(opt.getValue());
    117    }
    118 }
    119 
    120 void Serializer::emitAttributes(const OptionMap& attributes) {
    121    // Errors should have been checked before this point
    122    UErrorCode localStatus = U_ZERO_ERROR;
    123    U_ASSERT(!attributes.bogus);
    124    for (int32_t i = 0; i < attributes.size(); i++) {
    125        const Option& attr = attributes.getOption(i, localStatus);
    126        // No need to check error code, since we already checked
    127        // that !bogus
    128        whitespace();
    129        emit(AT);
    130        emit(attr.getName());
    131        const Operand& v = attr.getValue();
    132        if (!v.isNull()) {
    133            emit(EQUALS);
    134            emit(v);
    135        }
    136    }
    137 }
    138 
    139 void Serializer::emit(const Expression& expr) {
    140    emit(LEFT_CURLY_BRACE);
    141 
    142    if (!expr.isFunctionCall()) {
    143        // Literal or variable, no annotation
    144        emit(expr.getOperand());
    145    } else {
    146        // Function call or reserved
    147        if (!expr.isStandaloneAnnotation()) {
    148          // Must be a function call that has an operand
    149          emit(expr.getOperand());
    150          whitespace();
    151        }
    152        UErrorCode localStatus = U_ZERO_ERROR;
    153        const Operator* rator = expr.getOperator(localStatus);
    154        U_ASSERT(U_SUCCESS(localStatus));
    155        emit(COLON);
    156        emit(rator->getFunctionName());
    157        // No whitespace after function name, in case it has
    158        // no options. (when there are options, emit(OptionMap) will
    159        // emit the leading whitespace)
    160        emit(rator->getOptionsInternal());
    161    }
    162    emitAttributes(expr.getAttributesInternal());
    163    emit(RIGHT_CURLY_BRACE);
    164 }
    165 
    166 void Serializer::emit(const PatternPart& part) {
    167    if (part.isText()) {
    168        // Raw text
    169        const UnicodeString& text = part.asText();
    170        // Re-escape '{'/'}'/'\''|'
    171        for (int32_t i = 0; ((int32_t) i) < text.length(); i++) {
    172          switch(text[i]) {
    173          case PIPE:
    174          case BACKSLASH:
    175          case LEFT_CURLY_BRACE:
    176          case RIGHT_CURLY_BRACE: {
    177            emit(BACKSLASH);
    178            break;
    179          }
    180          default:
    181            break;
    182          }
    183          emit(text[i]);
    184        }
    185        return;
    186    }
    187    // Markup
    188    if (part.isMarkup()) {
    189        const Markup& markup = part.asMarkup();
    190        emit(LEFT_CURLY_BRACE);
    191        if (markup.isClose()) {
    192            emit(SLASH);
    193            } else {
    194            emit(NUMBER_SIGN);
    195        }
    196        emit(markup.getName());
    197        emit(markup.getOptionsInternal());
    198        emitAttributes(markup.getAttributesInternal());
    199        if (markup.isStandalone()) {
    200            emit(SLASH);
    201        }
    202        emit(RIGHT_CURLY_BRACE);
    203        return;
    204    }
    205    // Expression
    206    emit(part.contents());
    207 }
    208 
    209 void Serializer::emit(const Pattern& pat) {
    210    int32_t len = pat.numParts();
    211    // Always quote pattern, which should match the normalized input
    212    // if the parser is constructing it correctly
    213    emit(LEFT_CURLY_BRACE);
    214    emit(LEFT_CURLY_BRACE);
    215    for (int32_t i = 0; i < len; i++) {
    216        // No whitespace is needed here -- see the `pattern` nonterminal in the grammar
    217        emit(pat.getPart(i));
    218    }
    219    emit(RIGHT_CURLY_BRACE);
    220    emit(RIGHT_CURLY_BRACE);
    221 }
    222 
    223 void Serializer::serializeDeclarations() {
    224    const Binding* bindings = dataModel.getLocalVariablesInternal();
    225    U_ASSERT(dataModel.bindingsLen == 0 || bindings != nullptr);
    226 
    227    for (int32_t i = 0; i < dataModel.bindingsLen; i++) {
    228        const Binding& b = bindings[i];
    229        if (b.isLocal()) {
    230            // No whitespace needed here -- see `message` in the grammar
    231            emit(ID_LOCAL);
    232            whitespace();
    233            emit(DOLLAR);
    234            emit(b.getVariable());
    235            // No whitespace needed here -- see `local-declaration` in the grammar
    236            emit(EQUALS);
    237            // No whitespace needed here -- see `local-declaration` in the grammar
    238        } else {
    239            // Input declaration
    240            emit(ID_INPUT);
    241            // No whitespace needed here -- see `input-declaration` in the grammar
    242        }
    243        emit(b.getValue());
    244    }
    245 }
    246 
    247 void Serializer::serializeSelectors() {
    248    U_ASSERT(!dataModel.hasPattern());
    249    const VariableName* selectors = dataModel.getSelectorsInternal();
    250 
    251    emit(ID_MATCH);
    252    for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
    253        whitespace();
    254        emit(DOLLAR);
    255        emit(selectors[i]);
    256    }
    257 }
    258 
    259 void Serializer::serializeVariants() {
    260    U_ASSERT(!dataModel.hasPattern());
    261    const Variant* variants = dataModel.getVariantsInternal();
    262    whitespace();
    263    for (int32_t i = 0; i < dataModel.numVariants(); i++) {
    264        const Variant& v = variants[i];
    265        emit(v.getKeys());
    266        // No whitespace needed here -- see `variant` in the grammar
    267        emit(v.getPattern());
    268    }
    269 }
    270 
    271 
    272 // Main (public) serializer method
    273 void Serializer::serialize() {
    274    serializeDeclarations();
    275    // Pattern message
    276    if (dataModel.hasPattern()) {
    277      emit(dataModel.getPattern());
    278    } else {
    279      // Selectors message
    280      serializeSelectors();
    281      serializeVariants();
    282    }
    283 }
    284 
    285 } // namespace message2
    286 U_NAMESPACE_END
    287 
    288 #endif /* #if !UCONFIG_NO_MF2 */
    289 
    290 #endif /* #if !UCONFIG_NO_FORMATTING */
    291 
    292 #endif /* #if !UCONFIG_NO_NORMALIZATION */