expression.py (9231B)
1 # This Source Code Form is subject to the terms of the Mozilla Public 2 # License, v. 2.0. If a copy of the MPL was not distributed with this file, 3 # You can obtain one at http://mozilla.org/MPL/2.0/. 4 5 import functools 6 import re 7 import sys 8 import traceback 9 10 __all__ = ["parse", "ParseError", "ExpressionParser"] 11 12 # expr.py 13 # from: 14 # http://k0s.org/mozilla/hg/expressionparser 15 # http://hg.mozilla.org/users/tmielczarek_mozilla.com/expressionparser 16 17 # Implements a top-down parser/evaluator for simple boolean expressions. 18 # ideas taken from http://effbot.org/zone/simple-top-down-parsing.htm 19 # 20 # Rough grammar: 21 # expr := literal 22 # | '(' expr ')' 23 # | expr '&&' expr 24 # | expr '||' expr 25 # | expr '==' expr 26 # | expr '!=' expr 27 # | expr '<' expr 28 # | expr '>' expr 29 # | expr '<=' expr 30 # | expr '>=' expr 31 # literal := BOOL 32 # | INT 33 # | STRING 34 # | IDENT 35 # BOOL := true|false 36 # INT := [0-9]+ 37 # STRING := "[^"]*" 38 # IDENT := [A-Za-z_]\w* 39 40 # Identifiers take their values from a mapping dictionary passed as the second 41 # argument. 42 43 # Glossary (see above URL for details): 44 # - nud: null denotation 45 # - led: left detonation 46 # - lbp: left binding power 47 # - rbp: right binding power 48 49 50 class ident_token: 51 def __init__(self, scanner, value): 52 self.value = value 53 54 def nud(self, parser): 55 # identifiers take their value from the value mappings passed 56 # to the parser 57 return parser.value(self.value) 58 59 60 class literal_token: 61 def __init__(self, scanner, value): 62 self.value = value 63 64 def nud(self, parser): 65 return self.value 66 67 68 class eq_op_token: 69 "==" 70 71 def led(self, parser, left): 72 return left == parser.expression(self.lbp) 73 74 75 class neq_op_token: 76 "!=" 77 78 def led(self, parser, left): 79 return left != parser.expression(self.lbp) 80 81 82 class lt_op_token: 83 "<" 84 85 def led(self, parser, left): 86 return left < parser.expression(self.lbp) 87 88 89 class gt_op_token: 90 ">" 91 92 def led(self, parser, left): 93 return left > parser.expression(self.lbp) 94 95 96 class le_op_token: 97 "<=" 98 99 def led(self, parser, left): 100 return left <= parser.expression(self.lbp) 101 102 103 class ge_op_token: 104 ">=" 105 106 def led(self, parser, left): 107 return left >= parser.expression(self.lbp) 108 109 110 class not_op_token: 111 "!" 112 113 def nud(self, parser): 114 return not parser.expression(100) 115 116 117 class and_op_token: 118 "&&" 119 120 def led(self, parser, left): 121 right = parser.expression(self.lbp) 122 return left and right 123 124 125 class or_op_token: 126 "||" 127 128 def led(self, parser, left): 129 right = parser.expression(self.lbp) 130 return left or right 131 132 133 class lparen_token: 134 "(" 135 136 def nud(self, parser): 137 expr = parser.expression() 138 parser.advance(rparen_token) 139 return expr 140 141 142 class rparen_token: 143 ")" 144 145 146 class end_token: 147 """always ends parsing""" 148 149 150 # derived literal tokens 151 152 153 class bool_token(literal_token): 154 def __init__(self, scanner, value): 155 value = {"true": True, "false": False}[value] 156 literal_token.__init__(self, scanner, value) 157 158 159 class int_token(literal_token): 160 def __init__(self, scanner, value): 161 literal_token.__init__(self, scanner, int(value)) 162 163 164 class string_token(literal_token): 165 def __init__(self, scanner, value): 166 literal_token.__init__(self, scanner, value[1:-1]) 167 168 169 precedence = [ 170 (end_token, rparen_token), 171 (or_op_token,), 172 (and_op_token,), 173 (lt_op_token, gt_op_token, le_op_token, ge_op_token, eq_op_token, neq_op_token), 174 (lparen_token,), 175 ] 176 for index, rank in enumerate(precedence): 177 for token in rank: 178 token.lbp = index # lbp = lowest left binding power 179 180 181 class ParseError(Exception): 182 """error parsing conditional expression""" 183 184 185 # ignore warnings about using `cache` instead; this code 186 # runs on python versions that don't support that 187 @functools.lru_cache(maxsize=None) # noqa: UP033 188 def scan(text): 189 if not ExpressionParser.scanner: 190 ExpressionParser.scanner = re.Scanner([ 191 # Note: keep these in sync with the class docstring above. 192 (r"true|false", bool_token), 193 (r"[a-zA-Z_]\w*", ident_token), 194 (r"[0-9]+", int_token), 195 (r'("[^"]*")|(\'[^\']*\')', string_token), 196 (r"==", eq_op_token()), 197 (r"!=", neq_op_token()), 198 (r"<=", le_op_token()), 199 (r">=", ge_op_token()), 200 (r"<", lt_op_token()), 201 (r">", gt_op_token()), 202 (r"\|\|", or_op_token()), 203 (r"!", not_op_token()), 204 (r"&&", and_op_token()), 205 (r"\(", lparen_token()), 206 (r"\)", rparen_token()), 207 (r"\s+", None), # skip whitespace 208 ]) 209 tokens, _ = ExpressionParser.scanner.scan(text) 210 return tokens 211 212 213 class ExpressionParser: 214 r""" 215 A parser for a simple expression language. 216 217 The expression language can be described as follows:: 218 219 EXPRESSION ::= LITERAL | '(' EXPRESSION ')' | '!' EXPRESSION | EXPRESSION OP EXPRESSION 220 OP ::= '==' | '!=' | '<' | '>' | '<=' | '>=' | '&&' | '||' 221 LITERAL ::= BOOL | INT | IDENT | STRING 222 BOOL ::= 'true' | 'false' 223 INT ::= [0-9]+ 224 IDENT ::= [a-zA-Z_]\w* 225 STRING ::= '"' [^\"] '"' | ''' [^\'] ''' 226 227 At its core, expressions consist of booleans, integers, identifiers and. 228 strings. Booleans are one of *true* or *false*. Integers are a series 229 of digits. Identifiers are a series of English letters and underscores. 230 Strings are a pair of matching quote characters (single or double) with 231 zero or more characters inside. 232 233 Expressions can be combined with operators: the equals (==) and not 234 equals (!=) operators compare two expressions and produce a boolean. The 235 and (&&) and or (||) operators take two expressions and produce the logical 236 AND or OR value of them, respectively. An expression can also be prefixed 237 with the not (!) operator, which produces its logical negation. 238 239 Finally, any expression may be contained within parentheses for grouping. 240 241 Identifiers take their values from the mapping provided. 242 """ 243 244 scanner = None 245 246 def __init__(self, text, valuemapping, strict=False): 247 """ 248 Initialize the parser 249 :param text: The expression to parse as a string. 250 :param valuemapping: A dict mapping identifier names to values. 251 :param strict: If true, referencing an identifier that was not 252 provided in :valuemapping: will raise an error. 253 """ 254 self.text = text 255 self.valuemapping = valuemapping 256 self.strict = strict 257 258 def _tokenize(self): 259 """ 260 Lex the input text into tokens and yield them in sequence. 261 """ 262 yield from scan(self.text) 263 yield end_token() 264 265 def value(self, ident): 266 """ 267 Look up the value of |ident| in the value mapping passed in the 268 constructor. 269 """ 270 if self.strict: 271 return self.valuemapping[ident] 272 else: 273 return self.valuemapping.get(ident, "") 274 275 def advance(self, expected): 276 """ 277 Assert that the next token is an instance of |expected|, and advance 278 to the next token. 279 """ 280 if not isinstance(self.token, expected): 281 raise Exception("Unexpected token!") 282 self.token = next(self.iter) 283 284 def expression(self, rbp=0): 285 """ 286 Parse and return the value of an expression until a token with 287 right binding power greater than rbp is encountered. 288 """ 289 t = self.token 290 self.token = next(self.iter) 291 left = t.nud(self) 292 while rbp < self.token.lbp: 293 t = self.token 294 self.token = next(self.iter) 295 left = t.led(self, left) 296 return left 297 298 def parse(self): 299 """ 300 Parse and return the value of the expression in the text 301 passed to the constructor. Raises a ParseError if the expression 302 could not be parsed. 303 """ 304 try: 305 self.iter = self._tokenize() 306 self.token = next(self.iter) 307 return self.expression() 308 except Exception: 309 extype, ex, tb = sys.exc_info() 310 formatted = "".join(traceback.format_exception_only(extype, ex)) 311 pe = ParseError( 312 "could not parse: %s\nexception: %svariables: %s" 313 % (self.text, formatted, self.valuemapping) 314 ) 315 raise pe.with_traceback(tb) 316 317 __call__ = parse 318 319 320 # ignore warnings about using `cache` instead; this code 321 # runs on python versions that don't support that 322 @functools.lru_cache(maxsize=None) # noqa: UP033 323 def parse(text, strict=False, **values): 324 """ 325 Parse and evaluate a boolean expression. 326 :param text: The expression to parse, as a string. 327 :param values: A dict containing a name to value mapping for identifiers 328 referenced in *text*. 329 :rtype: the final value of the expression. 330 :raises: :py:exc::ParseError: will be raised if parsing fails. 331 """ 332 return ExpressionParser(text, values, strict=strict).parse()