tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

expression.py (9231B)


      1 # This Source Code Form is subject to the terms of the Mozilla Public
      2 # License, v. 2.0. If a copy of the MPL was not distributed with this file,
      3 # You can obtain one at http://mozilla.org/MPL/2.0/.
      4 
      5 import functools
      6 import re
      7 import sys
      8 import traceback
      9 
     10 __all__ = ["parse", "ParseError", "ExpressionParser"]
     11 
     12 # expr.py
     13 # from:
     14 # http://k0s.org/mozilla/hg/expressionparser
     15 # http://hg.mozilla.org/users/tmielczarek_mozilla.com/expressionparser
     16 
     17 # Implements a top-down parser/evaluator for simple boolean expressions.
     18 # ideas taken from http://effbot.org/zone/simple-top-down-parsing.htm
     19 #
     20 # Rough grammar:
     21 # expr := literal
     22 #       | '(' expr ')'
     23 #       | expr '&&' expr
     24 #       | expr '||' expr
     25 #       | expr '==' expr
     26 #       | expr '!=' expr
     27 #       | expr '<' expr
     28 #       | expr '>' expr
     29 #       | expr '<=' expr
     30 #       | expr '>=' expr
     31 # literal := BOOL
     32 #          | INT
     33 #          | STRING
     34 #          | IDENT
     35 # BOOL   := true|false
     36 # INT    := [0-9]+
     37 # STRING := "[^"]*"
     38 # IDENT  := [A-Za-z_]\w*
     39 
     40 # Identifiers take their values from a mapping dictionary passed as the second
     41 # argument.
     42 
     43 # Glossary (see above URL for details):
     44 # - nud: null denotation
     45 # - led: left detonation
     46 # - lbp: left binding power
     47 # - rbp: right binding power
     48 
     49 
     50 class ident_token:
     51    def __init__(self, scanner, value):
     52        self.value = value
     53 
     54    def nud(self, parser):
     55        # identifiers take their value from the value mappings passed
     56        # to the parser
     57        return parser.value(self.value)
     58 
     59 
     60 class literal_token:
     61    def __init__(self, scanner, value):
     62        self.value = value
     63 
     64    def nud(self, parser):
     65        return self.value
     66 
     67 
     68 class eq_op_token:
     69    "=="
     70 
     71    def led(self, parser, left):
     72        return left == parser.expression(self.lbp)
     73 
     74 
     75 class neq_op_token:
     76    "!="
     77 
     78    def led(self, parser, left):
     79        return left != parser.expression(self.lbp)
     80 
     81 
     82 class lt_op_token:
     83    "<"
     84 
     85    def led(self, parser, left):
     86        return left < parser.expression(self.lbp)
     87 
     88 
     89 class gt_op_token:
     90    ">"
     91 
     92    def led(self, parser, left):
     93        return left > parser.expression(self.lbp)
     94 
     95 
     96 class le_op_token:
     97    "<="
     98 
     99    def led(self, parser, left):
    100        return left <= parser.expression(self.lbp)
    101 
    102 
    103 class ge_op_token:
    104    ">="
    105 
    106    def led(self, parser, left):
    107        return left >= parser.expression(self.lbp)
    108 
    109 
    110 class not_op_token:
    111    "!"
    112 
    113    def nud(self, parser):
    114        return not parser.expression(100)
    115 
    116 
    117 class and_op_token:
    118    "&&"
    119 
    120    def led(self, parser, left):
    121        right = parser.expression(self.lbp)
    122        return left and right
    123 
    124 
    125 class or_op_token:
    126    "||"
    127 
    128    def led(self, parser, left):
    129        right = parser.expression(self.lbp)
    130        return left or right
    131 
    132 
    133 class lparen_token:
    134    "("
    135 
    136    def nud(self, parser):
    137        expr = parser.expression()
    138        parser.advance(rparen_token)
    139        return expr
    140 
    141 
    142 class rparen_token:
    143    ")"
    144 
    145 
    146 class end_token:
    147    """always ends parsing"""
    148 
    149 
    150 # derived literal tokens
    151 
    152 
    153 class bool_token(literal_token):
    154    def __init__(self, scanner, value):
    155        value = {"true": True, "false": False}[value]
    156        literal_token.__init__(self, scanner, value)
    157 
    158 
    159 class int_token(literal_token):
    160    def __init__(self, scanner, value):
    161        literal_token.__init__(self, scanner, int(value))
    162 
    163 
    164 class string_token(literal_token):
    165    def __init__(self, scanner, value):
    166        literal_token.__init__(self, scanner, value[1:-1])
    167 
    168 
    169 precedence = [
    170    (end_token, rparen_token),
    171    (or_op_token,),
    172    (and_op_token,),
    173    (lt_op_token, gt_op_token, le_op_token, ge_op_token, eq_op_token, neq_op_token),
    174    (lparen_token,),
    175 ]
    176 for index, rank in enumerate(precedence):
    177    for token in rank:
    178        token.lbp = index  # lbp = lowest left binding power
    179 
    180 
    181 class ParseError(Exception):
    182    """error parsing conditional expression"""
    183 
    184 
    185 # ignore warnings about using `cache` instead; this code
    186 # runs on python versions that don't support that
    187 @functools.lru_cache(maxsize=None)  # noqa: UP033
    188 def scan(text):
    189    if not ExpressionParser.scanner:
    190        ExpressionParser.scanner = re.Scanner([
    191            # Note: keep these in sync with the class docstring above.
    192            (r"true|false", bool_token),
    193            (r"[a-zA-Z_]\w*", ident_token),
    194            (r"[0-9]+", int_token),
    195            (r'("[^"]*")|(\'[^\']*\')', string_token),
    196            (r"==", eq_op_token()),
    197            (r"!=", neq_op_token()),
    198            (r"<=", le_op_token()),
    199            (r">=", ge_op_token()),
    200            (r"<", lt_op_token()),
    201            (r">", gt_op_token()),
    202            (r"\|\|", or_op_token()),
    203            (r"!", not_op_token()),
    204            (r"&&", and_op_token()),
    205            (r"\(", lparen_token()),
    206            (r"\)", rparen_token()),
    207            (r"\s+", None),  # skip whitespace
    208        ])
    209    tokens, _ = ExpressionParser.scanner.scan(text)
    210    return tokens
    211 
    212 
    213 class ExpressionParser:
    214    r"""
    215    A parser for a simple expression language.
    216 
    217    The expression language can be described as follows::
    218 
    219        EXPRESSION ::= LITERAL | '(' EXPRESSION ')' | '!' EXPRESSION | EXPRESSION OP EXPRESSION
    220        OP ::= '==' | '!=' | '<' | '>' | '<=' | '>=' | '&&' | '||'
    221        LITERAL ::= BOOL | INT | IDENT | STRING
    222        BOOL ::= 'true' | 'false'
    223        INT ::= [0-9]+
    224        IDENT ::= [a-zA-Z_]\w*
    225        STRING ::= '"' [^\"] '"' | ''' [^\'] '''
    226 
    227    At its core, expressions consist of booleans, integers, identifiers and.
    228    strings. Booleans are one of *true* or *false*. Integers are a series
    229    of digits. Identifiers are a series of English letters and underscores.
    230    Strings are a pair of matching quote characters (single or double) with
    231    zero or more characters inside.
    232 
    233    Expressions can be combined with operators: the equals (==) and not
    234    equals (!=) operators compare two expressions and produce a boolean. The
    235    and (&&) and or (||) operators take two expressions and produce the logical
    236    AND or OR value of them, respectively. An expression can also be prefixed
    237    with the not (!) operator, which produces its logical negation.
    238 
    239    Finally, any expression may be contained within parentheses for grouping.
    240 
    241    Identifiers take their values from the mapping provided.
    242    """
    243 
    244    scanner = None
    245 
    246    def __init__(self, text, valuemapping, strict=False):
    247        """
    248        Initialize the parser
    249        :param text: The expression to parse as a string.
    250        :param valuemapping: A dict mapping identifier names to values.
    251        :param strict: If true, referencing an identifier that was not
    252                       provided in :valuemapping: will raise an error.
    253        """
    254        self.text = text
    255        self.valuemapping = valuemapping
    256        self.strict = strict
    257 
    258    def _tokenize(self):
    259        """
    260        Lex the input text into tokens and yield them in sequence.
    261        """
    262        yield from scan(self.text)
    263        yield end_token()
    264 
    265    def value(self, ident):
    266        """
    267        Look up the value of |ident| in the value mapping passed in the
    268        constructor.
    269        """
    270        if self.strict:
    271            return self.valuemapping[ident]
    272        else:
    273            return self.valuemapping.get(ident, "")
    274 
    275    def advance(self, expected):
    276        """
    277        Assert that the next token is an instance of |expected|, and advance
    278        to the next token.
    279        """
    280        if not isinstance(self.token, expected):
    281            raise Exception("Unexpected token!")
    282        self.token = next(self.iter)
    283 
    284    def expression(self, rbp=0):
    285        """
    286        Parse and return the value of an expression until a token with
    287        right binding power greater than rbp is encountered.
    288        """
    289        t = self.token
    290        self.token = next(self.iter)
    291        left = t.nud(self)
    292        while rbp < self.token.lbp:
    293            t = self.token
    294            self.token = next(self.iter)
    295            left = t.led(self, left)
    296        return left
    297 
    298    def parse(self):
    299        """
    300        Parse and return the value of the expression in the text
    301        passed to the constructor. Raises a ParseError if the expression
    302        could not be parsed.
    303        """
    304        try:
    305            self.iter = self._tokenize()
    306            self.token = next(self.iter)
    307            return self.expression()
    308        except Exception:
    309            extype, ex, tb = sys.exc_info()
    310            formatted = "".join(traceback.format_exception_only(extype, ex))
    311            pe = ParseError(
    312                "could not parse: %s\nexception: %svariables: %s"
    313                % (self.text, formatted, self.valuemapping)
    314            )
    315            raise pe.with_traceback(tb)
    316 
    317    __call__ = parse
    318 
    319 
    320 # ignore warnings about using `cache` instead; this code
    321 # runs on python versions that don't support that
    322 @functools.lru_cache(maxsize=None)  # noqa: UP033
    323 def parse(text, strict=False, **values):
    324    """
    325    Parse and evaluate a boolean expression.
    326    :param text: The expression to parse, as a string.
    327    :param values: A dict containing a name to value mapping for identifiers
    328                   referenced in *text*.
    329    :rtype: the final value of the expression.
    330    :raises: :py:exc::ParseError: will be raised if parsing fails.
    331    """
    332    return ExpressionParser(text, values, strict=strict).parse()