tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

messageformat2_parser.h (8780B)


      1 // © 2024 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 #include "unicode/utypes.h"
      5 
      6 #ifndef U_HIDE_DEPRECATED_API
      7 
      8 #ifndef MESSAGEFORMAT_PARSER_H
      9 #define MESSAGEFORMAT_PARSER_H
     10 
     11 #include "unicode/messageformat2_data_model.h"
     12 #include "unicode/parseerr.h"
     13 #include "unicode/uniset.h"
     14 
     15 #include "messageformat2_allocation.h"
     16 #include "messageformat2_errors.h"
     17 
     18 #if U_SHOW_CPLUSPLUS_API
     19 
     20 #if !UCONFIG_NO_NORMALIZATION
     21 
     22 #if !UCONFIG_NO_FORMATTING
     23 
     24 #if !UCONFIG_NO_MF2
     25 
     26 U_NAMESPACE_BEGIN
     27 
     28 namespace message2 {
     29 
     30    using namespace data_model;
     31 
     32    // Used for parameterizing options parsing code
     33    // over the two builders that use it (Operator and Markup)
     34    template <class T>
     35    class OptionAdder {
     36        private:
     37            T& builder;
     38        public:
     39            OptionAdder(T& b) : builder(b) {}
     40            void addOption(const UnicodeString& k, Operand&& r, UErrorCode& s) {
     41                builder.addOption(k, std::move(r), s);
     42            }
     43    };
     44 
     45    // Used for parameterizing attributes parsing code
     46    // over the two builders that use it (Expression and Markup)
     47    // Unfortunately the same OptionAdder class can't just be reused,
     48    // becaues duplicate options are forbidden while duplicate attributes are not
     49    template <class T>
     50    class AttributeAdder {
     51        private:
     52            T& builder;
     53        public:
     54            AttributeAdder(T& b) : builder(b) {}
     55            void addAttribute(const UnicodeString& k, Operand&& r, UErrorCode& s) {
     56                builder.addAttribute(k, std::move(r), s);
     57            }
     58    };
     59 
     60 
     61    // Initialization of UnicodeSets
     62    namespace unisets {
     63        enum Key {
     64            CONTENT,
     65            WHITESPACE,
     66            BIDI,
     67            ALPHA,
     68            DIGIT,
     69            NAME_START,
     70            NAME_CHAR,
     71            TEXT,
     72            QUOTED,
     73            ESCAPABLE,
     74            UNISETS_KEY_COUNT
     75        };
     76 
     77    U_I18N_API const UnicodeSet* get(Key key, UErrorCode& status);
     78    }
     79 
     80    // Parser class (private)
     81    class Parser : public UMemory {
     82    public:
     83 virtual ~Parser();
     84    private:
     85        friend class MessageFormatter;
     86 
     87        void parse(UParseError&, UErrorCode&);
     88 
     89 /*
     90   Use an internal "parse error" structure to make it easier to translate
     91   absolute offsets to line offsets.
     92   This is translated back to a `UParseError` at the end of parsing.
     93 */
     94 typedef struct MessageParseError {
     95     // The line on which the error occurred
     96     uint32_t line;
     97     // The offset, relative to the erroneous line, on which the error occurred
     98     uint32_t offset;
     99     // The total number of characters seen before advancing to the current line. It has a value of 0 if line == 0.
    100     // It includes newline characters, because the index does too.
    101     uint32_t lengthBeforeCurrentLine;
    102 
    103     // This parser doesn't yet use the last two fields.
    104     UChar   preContext[U_PARSE_CONTEXT_LEN];
    105     UChar   postContext[U_PARSE_CONTEXT_LEN];
    106 } MessageParseError;
    107 
    108 Parser(const UnicodeString &input,
    109               MFDataModel::Builder& dataModelBuilder,
    110               StaticErrors& e,
    111               UnicodeString& normalizedInputRef,
    112               UErrorCode& status)
    113            : contentChars(unisets::get(unisets::CONTENT, status)),
    114              whitespaceChars(unisets::get(unisets::WHITESPACE, status)),
    115              bidiControlChars(unisets::get(unisets::BIDI, status)),
    116              alphaChars(unisets::get(unisets::ALPHA, status)),
    117              digitChars(unisets::get(unisets::DIGIT, status)),
    118              nameStartChars(unisets::get(unisets::NAME_START, status)),
    119              nameChars(unisets::get(unisets::NAME_CHAR, status)),
    120              textChars(unisets::get(unisets::TEXT, status)),
    121              quotedChars(unisets::get(unisets::QUOTED, status)),
    122              escapableChars(unisets::get(unisets::ESCAPABLE, status)),
    123            source(input), index(0), errors(e), normalizedInput(normalizedInputRef), dataModel(dataModelBuilder) {
    124            (void) status;
    125   parseError.line = 0;
    126   parseError.offset = 0;
    127   parseError.lengthBeforeCurrentLine = 0;
    128   parseError.preContext[0] = '\0';
    129   parseError.postContext[0] = '\0';
    130 }
    131 
    132        bool isContentChar(UChar32) const;
    133        bool isBidiControl(UChar32) const;
    134        bool isWhitespace(UChar32) const;
    135        bool isTextChar(UChar32) const;
    136        bool isQuotedChar(UChar32) const;
    137        bool isEscapableChar(UChar32) const;
    138        bool isAlpha(UChar32) const;
    139        bool isDigit(UChar32) const;
    140        bool isNameStart(UChar32) const;
    141        bool isNameChar(UChar32) const;
    142        bool isUnquotedStart(UChar32) const;
    143        bool isLiteralStart(UChar32) const;
    144        bool isKeyStart(UChar32) const;
    145 
    146 static void translateParseError(const MessageParseError&, UParseError&);
    147 static void setParseError(MessageParseError&, uint32_t);
    148 void maybeAdvanceLine();
    149        Pattern parseSimpleMessage(UErrorCode&);
    150        void parseBody(UErrorCode&);
    151 void parseDeclarations(UErrorCode&);
    152        void parseUnsupportedStatement(UErrorCode&);
    153        void parseLocalDeclaration(UErrorCode&);
    154        void parseInputDeclaration(UErrorCode&);
    155        void parseSelectors(UErrorCode&);
    156        void parseVariant(UErrorCode&);
    157 
    158 void parseRequiredWS(UErrorCode&);
    159 void parseRequiredWhitespace(UErrorCode&);
    160 void parseOptionalBidi();
    161 void parseOptionalWhitespace();
    162 void parseToken(UChar32, UErrorCode&);
    163 void parseTokenWithWhitespace(UChar32, UErrorCode&);
    164 void parseToken(const std::u16string_view&, UErrorCode&);
    165 void parseTokenWithWhitespace(const std::u16string_view&, UErrorCode&);
    166        bool nextIs(const std::u16string_view&) const;
    167 UnicodeString parseNameChars(UnicodeString&, UErrorCode&);
    168 UnicodeString parseName(UErrorCode&);
    169        UnicodeString parseIdentifier(UErrorCode&);
    170        UnicodeString parseDigits(UErrorCode&);
    171 VariableName parseVariableName(UErrorCode&);
    172 FunctionName parseFunction(UErrorCode&);
    173 UnicodeString parseEscapeSequence(UErrorCode&);
    174 Literal parseUnquotedLiteral(UErrorCode&);
    175        Literal parseQuotedLiteral(UErrorCode&);
    176 Literal parseLiteral(UErrorCode&);
    177        template<class T>
    178        void parseAttribute(AttributeAdder<T>&, UErrorCode&);
    179        template<class T>
    180        void parseAttributes(AttributeAdder<T>&, UErrorCode&);
    181        template<class T>
    182        void parseOption(OptionAdder<T>&, UErrorCode&);
    183        template<class T>
    184        void parseOptions(OptionAdder<T>&, UErrorCode&);
    185 Operator parseAnnotation(UErrorCode&);
    186 void parseLiteralOrVariableWithAnnotation(bool, Expression::Builder&, UErrorCode&);
    187        Markup parseMarkup(UErrorCode&);
    188 Expression parseExpression(UErrorCode&);
    189        std::variant<Expression, Markup> parsePlaceholder(UErrorCode&);
    190 UnicodeString parseTextChar(UErrorCode&);
    191 Key parseKey(UErrorCode&);
    192 SelectorKeys parseNonEmptyKeys(UErrorCode&);
    193 void errorPattern(UErrorCode& status);
    194 Pattern parseQuotedPattern(UErrorCode&);
    195        bool isDeclarationStart();
    196 
    197        UChar32 peek() const { return source.char32At(index) ; }
    198        UChar32 peek(uint32_t i) const {
    199            return source.char32At(source.moveIndex32(index, i));
    200        }
    201        void next() { index = source.moveIndex32(index, 1); }
    202 
    203        bool inBounds() const { return (int32_t) index < source.length(); }
    204        bool inBounds(uint32_t i) const { return source.moveIndex32(index, i) < source.length(); }
    205        bool allConsumed() const { return (int32_t) index == source.length(); }
    206 
    207        // UnicodeSets for checking character ranges
    208        const UnicodeSet* contentChars;
    209        const UnicodeSet* whitespaceChars;
    210        const UnicodeSet* bidiControlChars;
    211        const UnicodeSet* alphaChars;
    212        const UnicodeSet* digitChars;
    213        const UnicodeSet* nameStartChars;
    214        const UnicodeSet* nameChars;
    215        const UnicodeSet* textChars;
    216        const UnicodeSet* quotedChars;
    217        const UnicodeSet* escapableChars;
    218 
    219 // The input string
    220 const UnicodeString &source;
    221 // The current position within the input string -- counting in UChar32
    222 uint32_t index;
    223 // Represents the current line (and when an error is indicated),
    224 // character offset within the line of the parse error
    225 MessageParseError parseError;
    226 
    227 // The structure to use for recording errors
    228 StaticErrors& errors;
    229 
    230 // Normalized version of the input string (optional whitespace removed)
    231 UnicodeString& normalizedInput;
    232 
    233 // The parent builder
    234 MFDataModel::Builder &dataModel;
    235 
    236    }; // class Parser
    237 } // namespace message2
    238 
    239 U_NAMESPACE_END
    240 
    241 #endif /* #if !UCONFIG_NO_MF2 */
    242 
    243 #endif /* #if !UCONFIG_NO_FORMATTING */
    244 
    245 #endif /* #if !UCONFIG_NO_NORMALIZATION */
    246 
    247 #endif /* U_SHOW_CPLUSPLUS_API */
    248 
    249 #endif // MESSAGEFORMAT_PARSER_H
    250 
    251 #endif // U_HIDE_DEPRECATED_API
    252 // eof