expressions.c (111238B)
1 /// Vimscript expression parser 2 3 // Planned incompatibilities (to be included into vim_diff.txt when this parser 4 // will be an actual part of Vimscript evaluation process): 5 // 6 // 1. Expressions are first fully parsed and only then executed. This means 7 // that while ":echo [system('touch abc')" will create file "abc" in Vim and 8 // only then raise syntax error regarding missing comma in list in Neovim 9 // trying to execute that will immediately raise syntax error regarding 10 // missing list end without actually executing anything. 11 // 2. Expressions are first fully parsed, without considering any runtime 12 // information. This means things like that "d.a" does not change its 13 // meaning depending on type of "d" (or whether Vim is currently executing or 14 // skipping). For compatibility reasons the dot thus may either be “concat 15 // or subscript” operator or just “concat” operator. 16 // 3. Expressions parser is aware whether it is called for :echo or <C-r>=. 17 // This means that while "<C-r>=1 | 2<CR>" is equivalent to "<C-r>=1<CR>" 18 // because "| 2" part is left to be treated as a command separator and then 19 // ignored in Neovim it is an error. 20 // 4. Expressions parser has generally better error reporting. But for 21 // compatibility reasons most errors have error code E15 while error messages 22 // are significantly different from Vim’s E15. Also some error codes were 23 // retired because of being harder to emulate or because of them being 24 // a result of differences in parsing process: e.g. with ":echo {a, b}" Vim 25 // will attempt to parse expression as lambda, fail, check whether it is 26 // a curly-braces-name, fail again, and evaluate that as a dictionary, giving 27 // error regarding undefined variable "a" (or about missing colon). Neovim 28 // will not try to evaluate anything here: comma right after an argument name 29 // means that expression may not be anything, but lambda, so the resulting 30 // error message will never be about missing variable or colon: it will be 31 // about missing arrow (or a continuation of argument list). 32 // 5. Failing to parse expression always gives exactly one error message: no 33 // more stack of error messages like > 34 // 35 // :echo [1, 36 // E697: Missing end of List ']': 37 // E15: Invalid expression: [1, 38 // 39 // < , just exactly one E697 message. 40 // 6. Some expressions involving calling parenthesis which are treated 41 // separately by Vim even when not separated by spaces are treated as one 42 // expression by Neovim: e.g. ":echo (1)(1)" will yield runtime error after 43 // failing to call "1", while Vim will echo "1 1". Reasoning is the same: 44 // type of what is in the first expression is generally not known when 45 // parsing, so to have separate expressions like this separate them with 46 // spaces. 47 // 7. 'isident' no longer applies to environment variables, they always include 48 // ASCII alphanumeric characters and underscore and nothing except this. 49 50 #include <assert.h> 51 #include <stdbool.h> 52 #include <stddef.h> 53 #include <stdio.h> 54 #include <stdlib.h> 55 #include <string.h> 56 57 #include "klib/kvec.h" 58 #include "nvim/ascii_defs.h" 59 #include "nvim/assert_defs.h" 60 #include "nvim/charset.h" 61 #include "nvim/eval.h" 62 #include "nvim/gettext_defs.h" 63 #include "nvim/keycodes.h" 64 #include "nvim/macros_defs.h" 65 #include "nvim/mbyte.h" 66 #include "nvim/memory.h" 67 #include "nvim/types_defs.h" 68 #include "nvim/viml/parser/expressions.h" 69 #include "nvim/viml/parser/parser.h" 70 71 typedef kvec_withinit_t(ExprASTNode **, 16) ExprASTStack; 72 73 /// Which nodes may be wanted 74 typedef enum { 75 /// Operators: function call, subscripts, binary operators, … 76 /// 77 /// For unrestricted expressions. 78 kENodeOperator, 79 /// Values: literals, variables, nested expressions, unary operators. 80 /// 81 /// For unrestricted expressions as well, implies that top item in AST stack 82 /// points to NULL. 83 kENodeValue, 84 } ExprASTWantedNode; 85 86 /// Parse type: what is being parsed currently 87 typedef enum { 88 /// Parsing regular Vimscript expression 89 kEPTExpr = 0, 90 /// Parsing lambda arguments 91 /// 92 /// Just like parsing function arguments, but it is valid to be ended with an 93 /// arrow only. 94 kEPTLambdaArguments, 95 /// Assignment: parsing for :let 96 kEPTAssignment, 97 /// Single assignment: used when lists are not allowed (i.e. when nesting) 98 kEPTSingleAssignment, 99 } ExprASTParseType; 100 101 typedef kvec_withinit_t(ExprASTParseType, 4) ExprASTParseTypeStack; 102 103 /// Operator priority level 104 typedef enum { 105 kEOpLvlInvalid = 0, 106 kEOpLvlComplexIdentifier, 107 kEOpLvlParens, 108 kEOpLvlAssignment, 109 kEOpLvlArrow, 110 kEOpLvlComma, 111 kEOpLvlColon, 112 kEOpLvlTernaryValue, 113 kEOpLvlTernary, 114 kEOpLvlOr, 115 kEOpLvlAnd, 116 kEOpLvlComparison, 117 kEOpLvlAddition, ///< Addition, subtraction and concatenation. 118 kEOpLvlMultiplication, ///< Multiplication, division and modulo. 119 kEOpLvlUnary, ///< Unary operations: not, minus, plus. 120 kEOpLvlSubscript, ///< Subscripts. 121 kEOpLvlValue, ///< Values: literals, variables, nested expressions, … 122 } ExprOpLvl; 123 124 /// Operator associativity 125 typedef enum { 126 kEOpAssNo= 'n', ///< Not associative / not applicable. 127 kEOpAssLeft = 'l', ///< Left associativity. 128 kEOpAssRight = 'r', ///< Right associativity. 129 } ExprOpAssociativity; 130 131 #include "viml/parser/expressions.c.generated.h" 132 133 /// Scale number by a given factor 134 /// 135 /// Used to apply exponent to a number. Idea taken from uClibc. 136 /// 137 /// @param[in] num Number to scale. Does not bother doing anything if it is 138 /// zero. 139 /// @param[in] base Base, should be 10 since non-decimal floating-point 140 /// numbers are not supported. 141 /// @param[in] exponent Exponent to scale by. 142 /// @param[in] exponent_negative True if exponent is negative. 143 static inline float_T scale_number(const float_T num, const uint8_t base, 144 const uvarnumber_T exponent, const bool exponent_negative) 145 FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_CONST 146 { 147 if (num == 0 || exponent == 0) { 148 return num; 149 } 150 assert(base); 151 uvarnumber_T exp = exponent; 152 float_T p_base = (float_T)base; 153 float_T ret = num; 154 while (exp) { 155 if (exp & 1) { 156 if (exponent_negative) { 157 ret /= p_base; 158 } else { 159 ret *= p_base; 160 } 161 } 162 exp >>= 1; 163 p_base *= p_base; 164 } 165 return ret; 166 } 167 168 /// Get next token for the Vimscript expression input 169 /// 170 /// @param pstate Parser state. 171 /// @param[in] flags Flags, @see LexExprFlags. 172 /// 173 /// @return Next token. 174 LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags) 175 FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL 176 { 177 LexExprToken ret = { 178 .type = kExprLexInvalid, 179 .start = pstate->pos, 180 }; 181 ParserLine pline; 182 if (!viml_parser_get_remaining_line(pstate, &pline)) { 183 ret.type = kExprLexEOC; 184 return ret; 185 } 186 if (pline.size <= 0) { 187 ret.len = 0; 188 ret.type = kExprLexEOC; 189 goto viml_pexpr_next_token_adv_return; 190 } 191 ret.len = 1; 192 const uint8_t schar = (uint8_t)pline.data[0]; 193 #define GET_CCS(ret, pline) \ 194 do { \ 195 if (ret.len < pline.size \ 196 && strchr("?#", pline.data[ret.len]) != NULL) { \ 197 ret.data.cmp.ccs = \ 198 (ExprCaseCompareStrategy)pline.data[ret.len]; \ 199 ret.len++; \ 200 } else { \ 201 ret.data.cmp.ccs = kCCStrategyUseOption; \ 202 } \ 203 } while (0) 204 switch (schar) { 205 // Paired brackets. 206 #define BRACKET(typ, opning, clsing) \ 207 case opning: \ 208 case clsing: { \ 209 ret.type = typ; \ 210 ret.data.brc.closing = (schar == clsing); \ 211 break; \ 212 } 213 BRACKET(kExprLexParenthesis, '(', ')') 214 BRACKET(kExprLexBracket, '[', ']') 215 BRACKET(kExprLexFigureBrace, '{', '}') 216 #undef BRACKET 217 218 // Single character tokens without data. 219 #define CHAR(typ, ch) \ 220 case ch: { \ 221 ret.type = typ; \ 222 break; \ 223 } 224 CHAR(kExprLexQuestion, '?') 225 CHAR(kExprLexColon, ':') 226 CHAR(kExprLexComma, ',') 227 #undef CHAR 228 229 // Multiplication/division/modulo. 230 #define MUL(mul_type, ch) \ 231 case ch: { \ 232 ret.type = kExprLexMultiplication; \ 233 ret.data.mul.type = mul_type; \ 234 break; \ 235 } 236 MUL(kExprLexMulMul, '*') 237 MUL(kExprLexMulDiv, '/') 238 MUL(kExprLexMulMod, '%') 239 #undef MUL 240 241 #define CHARREG(typ, cond) \ 242 do { \ 243 ret.type = typ; \ 244 for (; (ret.len < pline.size \ 245 && cond(pline.data[ret.len])) \ 246 ; ret.len++) { \ 247 } \ 248 } while (0) 249 250 // Whitespace. 251 case ' ': 252 case TAB: 253 CHARREG(kExprLexSpacing, ascii_iswhite); 254 break; 255 256 // Control character, except for NUL, NL and TAB. 257 case Ctrl_A: 258 case Ctrl_B: 259 case Ctrl_C: 260 case Ctrl_D: 261 case Ctrl_E: 262 case Ctrl_F: 263 case Ctrl_G: 264 case Ctrl_H: 265 266 case Ctrl_K: 267 case Ctrl_L: 268 case Ctrl_M: 269 case Ctrl_N: 270 case Ctrl_O: 271 case Ctrl_P: 272 case Ctrl_Q: 273 case Ctrl_R: 274 case Ctrl_S: 275 case Ctrl_T: 276 case Ctrl_U: 277 case Ctrl_V: 278 case Ctrl_W: 279 case Ctrl_X: 280 case Ctrl_Y: 281 case Ctrl_Z: 282 #define ISCTRL(schar) (schar < ' ') 283 CHARREG(kExprLexInvalid, ISCTRL); 284 ret.data.err.type = kExprLexSpacing; 285 ret.data.err.msg = 286 _("E15: Invalid control character present in input: %.*s"); 287 break; 288 #undef ISCTRL 289 290 // Number. 291 case '0': 292 case '1': 293 case '2': 294 case '3': 295 case '4': 296 case '5': 297 case '6': 298 case '7': 299 case '8': 300 case '9': { 301 ret.data.num.is_float = false; 302 ret.data.num.base = 10; 303 size_t frac_start = 0; 304 size_t exp_start = 0; 305 size_t frac_end = 0; 306 bool exp_negative = false; 307 CHARREG(kExprLexNumber, ascii_isdigit); 308 if (flags & kELFlagAllowFloat) { 309 const LexExprToken non_float_ret = ret; 310 if (pline.size > ret.len + 1 311 && pline.data[ret.len] == '.' 312 && ascii_isdigit(pline.data[ret.len + 1])) { 313 ret.len++; 314 frac_start = ret.len; 315 frac_end = ret.len; 316 ret.data.num.is_float = true; 317 for (; ret.len < pline.size && ascii_isdigit(pline.data[ret.len]) 318 ; ret.len++) { 319 // A small optimization: trailing zeroes in fractional part do not 320 // add anything to significand, so it is useless to include them in 321 // frac_end. 322 if (pline.data[ret.len] != '0') { 323 frac_end = ret.len + 1; 324 } 325 } 326 if (pline.size > ret.len + 1 327 && (pline.data[ret.len] == 'e' 328 || pline.data[ret.len] == 'E') 329 && ((pline.size > ret.len + 2 330 && (pline.data[ret.len + 1] == '+' 331 || pline.data[ret.len + 1] == '-') 332 && ascii_isdigit(pline.data[ret.len + 2])) 333 || ascii_isdigit(pline.data[ret.len + 1]))) { 334 ret.len++; 335 if (pline.data[ret.len] == '+' 336 || (exp_negative = (pline.data[ret.len] == '-'))) { 337 ret.len++; 338 } 339 exp_start = ret.len; 340 CHARREG(kExprLexNumber, ascii_isdigit); 341 } 342 } 343 if (pline.size > ret.len 344 && (pline.data[ret.len] == '.' 345 || ASCII_ISALPHA(pline.data[ret.len]))) { 346 ret = non_float_ret; 347 } 348 } 349 // TODO(ZyX-I): detect overflows 350 if (ret.data.num.is_float) { 351 // Vim used to use string2float here which in turn uses strtod(). There 352 // are two problems with this approach: 353 // 1. strtod() is locale-dependent. Not sure how it is worked around so 354 // that I do not see relevant bugs, but it still does not look like 355 // a good idea. 356 // 2. strtod() does not accept length argument. 357 // 358 // The below variant of parsing floats was recognized as acceptable 359 // because it is basically how uClibc does the thing: it generates 360 // a number ignoring decimal point (but recording its position), then 361 // uses recorded position to scale number down when processing exponent. 362 float_T significand_part = 0; 363 uvarnumber_T exp_part = 0; 364 const size_t frac_size = frac_end - frac_start; 365 for (size_t i = 0; i < frac_end; i++) { 366 if (i == frac_start - 1) { 367 continue; 368 } 369 significand_part = significand_part * 10 + (pline.data[i] - '0'); 370 } 371 if (exp_start) { 372 vim_str2nr(pline.data + exp_start, NULL, NULL, 0, NULL, &exp_part, 373 (int)(ret.len - exp_start), false, NULL); 374 } 375 if (exp_negative) { 376 exp_part += frac_size; 377 } else { 378 if (exp_part < frac_size) { 379 exp_negative = true; 380 exp_part = frac_size - exp_part; 381 } else { 382 exp_part -= frac_size; 383 } 384 } 385 ret.data.num.val.floating = scale_number(significand_part, 10, exp_part, 386 exp_negative); 387 } else { 388 int len; 389 int prep; 390 vim_str2nr(pline.data, &prep, &len, STR2NR_ALL, NULL, 391 &ret.data.num.val.integer, (int)pline.size, false, NULL); 392 ret.len = (size_t)len; 393 const uint8_t bases[] = { 394 [0] = 10, 395 ['0'] = 8, 396 ['x'] = 16, ['X'] = 16, 397 ['b'] = 2, ['B'] = 2, 398 }; 399 ret.data.num.base = bases[prep]; 400 } 401 break; 402 } 403 404 #define ISWORD_OR_AUTOLOAD(x) \ 405 (ascii_isident(x) || (x) == AUTOLOAD_CHAR) 406 407 // Environment variable. 408 case '$': 409 CHARREG(kExprLexEnv, ascii_isident); 410 break; 411 412 // Normal variable/function name. 413 case 'a': 414 case 'b': 415 case 'c': 416 case 'd': 417 case 'e': 418 case 'f': 419 case 'g': 420 case 'h': 421 case 'i': 422 case 'j': 423 case 'k': 424 case 'l': 425 case 'm': 426 case 'n': 427 case 'o': 428 case 'p': 429 case 'q': 430 case 'r': 431 case 's': 432 case 't': 433 case 'u': 434 case 'v': 435 case 'w': 436 case 'x': 437 case 'y': 438 case 'z': 439 case 'A': 440 case 'B': 441 case 'C': 442 case 'D': 443 case 'E': 444 case 'F': 445 case 'G': 446 case 'H': 447 case 'I': 448 case 'J': 449 case 'K': 450 case 'L': 451 case 'M': 452 case 'N': 453 case 'O': 454 case 'P': 455 case 'Q': 456 case 'R': 457 case 'S': 458 case 'T': 459 case 'U': 460 case 'V': 461 case 'W': 462 case 'X': 463 case 'Y': 464 case 'Z': 465 case '_': 466 ret.data.var.scope = 0; 467 ret.data.var.autoload = false; 468 CHARREG(kExprLexPlainIdentifier, ascii_isident); 469 // "is" and "isnot" operators. 470 if (!(flags & kELFlagIsNotCmp) 471 && ((ret.len == 2 && memcmp(pline.data, "is", 2) == 0) 472 || (ret.len == 5 && memcmp(pline.data, "isnot", 5) == 0))) { 473 ret.type = kExprLexComparison; 474 ret.data.cmp.type = kExprCmpIdentical; 475 ret.data.cmp.inv = (ret.len == 5); 476 GET_CCS(ret, pline); 477 // Scope: `s:`, etc. 478 } else if (ret.len == 1 479 && pline.size > 1 480 && memchr(EXPR_VAR_SCOPE_LIST, schar, 481 sizeof(EXPR_VAR_SCOPE_LIST)) != NULL 482 && pline.data[ret.len] == ':' 483 && !(flags & kELFlagForbidScope)) { 484 ret.len++; 485 ret.data.var.scope = (ExprVarScope)schar; 486 CHARREG(kExprLexPlainIdentifier, ISWORD_OR_AUTOLOAD); 487 ret.data.var.autoload = ( 488 memchr(pline.data + 2, AUTOLOAD_CHAR, ret.len - 2) 489 != NULL); 490 // Previous CHARREG stopped at autoload character in order to make it 491 // possible to detect `is#`. Continue now with autoload characters 492 // included. 493 // 494 // Warning: there is ambiguity for the lexer: `is#Foo(1)` is a call of 495 // function `is#Foo()`, `1is#Foo(1)` is a comparison `1 is# Foo(1)`. This 496 // needs to be resolved on the higher level where context is available. 497 } else if (pline.size > ret.len 498 && pline.data[ret.len] == AUTOLOAD_CHAR) { 499 ret.data.var.autoload = true; 500 CHARREG(kExprLexPlainIdentifier, ISWORD_OR_AUTOLOAD); 501 } 502 break; 503 504 #undef ISWORD_OR_AUTOLOAD 505 #undef CHARREG 506 507 // Option. 508 case '&': { 509 #define OPTNAMEMISS(ret) \ 510 do { \ 511 ret.type = kExprLexInvalid; \ 512 ret.data.err.type = kExprLexOption; \ 513 ret.data.err.msg = _("E112: Option name missing: %.*s"); \ 514 } while (0) 515 if (pline.size > 1 && pline.data[1] == '&') { 516 ret.type = kExprLexAnd; 517 ret.len++; 518 break; 519 } 520 if (pline.size == 1 || !ASCII_ISALPHA(pline.data[1])) { 521 OPTNAMEMISS(ret); 522 break; 523 } 524 ret.type = kExprLexOption; 525 if (pline.size > 2 526 && pline.data[2] == ':' 527 && memchr(EXPR_OPT_SCOPE_LIST, pline.data[1], 528 sizeof(EXPR_OPT_SCOPE_LIST)) != NULL) { 529 ret.len += 2; 530 ret.data.opt.scope = (ExprOptScope)pline.data[1]; 531 ret.data.opt.name = pline.data + 3; 532 } else { 533 ret.data.opt.scope = kExprOptScopeUnspecified; 534 ret.data.opt.name = pline.data + 1; 535 } 536 const char *p = ret.data.opt.name; 537 const char *const e = pline.data + pline.size; 538 if (e - p >= 4 && p[0] == 't' && p[1] == '_') { 539 ret.data.opt.len = 4; 540 ret.len += 4; 541 } else { 542 for (; p < e && ASCII_ISALPHA(*p); p++) {} 543 ret.data.opt.len = (size_t)(p - ret.data.opt.name); 544 if (ret.data.opt.len == 0) { 545 OPTNAMEMISS(ret); 546 } else { 547 ret.len += ret.data.opt.len; 548 } 549 } 550 break; 551 #undef OPTNAMEMISS 552 } 553 554 // Register. 555 case '@': 556 ret.type = kExprLexRegister; 557 if (pline.size > 1) { 558 ret.len++; 559 ret.data.reg.name = (uint8_t)pline.data[1]; 560 } else { 561 ret.data.reg.name = -1; 562 } 563 break; 564 565 // Single quoted string. 566 case '\'': 567 ret.type = kExprLexSingleQuotedString; 568 ret.data.str.closed = false; 569 for (; ret.len < pline.size && !ret.data.str.closed; ret.len++) { 570 if (pline.data[ret.len] == '\'') { 571 if (ret.len + 1 < pline.size && pline.data[ret.len + 1] == '\'') { 572 ret.len++; 573 } else { 574 ret.data.str.closed = true; 575 } 576 } 577 } 578 break; 579 580 // Double quoted string. 581 case '"': 582 ret.type = kExprLexDoubleQuotedString; 583 ret.data.str.closed = false; 584 for (; ret.len < pline.size && !ret.data.str.closed; ret.len++) { 585 if (pline.data[ret.len] == '\\') { 586 if (ret.len + 1 < pline.size) { 587 ret.len++; 588 } 589 } else if (pline.data[ret.len] == '"') { 590 ret.data.str.closed = true; 591 } 592 } 593 break; 594 595 // Unary not, (un)equality and regex (not) match comparison operators. 596 case '!': 597 case '=': 598 if (pline.size == 1) { 599 ret.type = (schar == '!' ? kExprLexNot : kExprLexAssignment); 600 ret.data.ass.type = kExprAsgnPlain; 601 break; 602 } 603 ret.type = kExprLexComparison; 604 ret.data.cmp.inv = (schar == '!'); 605 if (pline.data[1] == '=') { 606 ret.data.cmp.type = kExprCmpEqual; 607 ret.len++; 608 } else if (pline.data[1] == '~') { 609 ret.data.cmp.type = kExprCmpMatches; 610 ret.len++; 611 } else if (schar == '!') { 612 ret.type = kExprLexNot; 613 } else { 614 ret.type = kExprLexAssignment; 615 ret.data.ass.type = kExprAsgnPlain; 616 } 617 GET_CCS(ret, pline); 618 break; 619 620 // Less/greater [or equal to] comparison operators. 621 case '>': 622 case '<': { 623 ret.type = kExprLexComparison; 624 const bool haseqsign = (pline.size > 1 && pline.data[1] == '='); 625 if (haseqsign) { 626 ret.len++; 627 } 628 GET_CCS(ret, pline); 629 ret.data.cmp.inv = (schar == '<'); 630 ret.data.cmp.type = ((ret.data.cmp.inv ^ haseqsign) 631 ? kExprCmpGreaterOrEqual 632 : kExprCmpGreater); 633 break; 634 } 635 636 // Minus sign, arrow from lambdas or augmented assignment. 637 case '-': { 638 if (pline.size > 1 && pline.data[1] == '>') { 639 ret.len++; 640 ret.type = kExprLexArrow; 641 } else if (pline.size > 1 && pline.data[1] == '=') { 642 ret.len++; 643 ret.type = kExprLexAssignment; 644 ret.data.ass.type = kExprAsgnSubtract; 645 } else { 646 ret.type = kExprLexMinus; 647 } 648 break; 649 } 650 651 // Sign or augmented assignment. 652 #define CHAR_OR_ASSIGN(ch, ch_type, ass_type) \ 653 case ch: { \ 654 if (pline.size > 1 && pline.data[1] == '=') { \ 655 ret.len++; \ 656 ret.type = kExprLexAssignment; \ 657 ret.data.ass.type = ass_type; \ 658 } else { \ 659 ret.type = ch_type; \ 660 } \ 661 break; \ 662 } 663 CHAR_OR_ASSIGN('+', kExprLexPlus, kExprAsgnAdd) 664 CHAR_OR_ASSIGN('.', kExprLexDot, kExprAsgnConcat) 665 #undef CHAR_OR_ASSIGN 666 667 // Expression end because Ex command ended. 668 case NUL: 669 case NL: 670 if (flags & kELFlagForbidEOC) { 671 ret.type = kExprLexInvalid; 672 ret.data.err.msg = _("E15: Unexpected EOC character: %.*s"); 673 ret.data.err.type = kExprLexSpacing; 674 } else { 675 ret.type = kExprLexEOC; 676 } 677 break; 678 679 case '|': 680 if (pline.size >= 2 && pline.data[ret.len] == '|') { 681 // "||" is or. 682 ret.len++; 683 ret.type = kExprLexOr; 684 } else if (flags & kELFlagForbidEOC) { 685 // Note: `<C-r>=1 | 2<CR>` actually yields 1 in Vim without any 686 // errors. This will be changed here. 687 ret.type = kExprLexInvalid; 688 ret.data.err.msg = _("E15: Unexpected EOC character: %.*s"); 689 ret.data.err.type = kExprLexOr; 690 } else { 691 ret.type = kExprLexEOC; 692 } 693 break; 694 695 // Everything else is not valid. 696 default: 697 ret.len = (size_t)utfc_ptr2len_len(pline.data, (int)pline.size); 698 ret.type = kExprLexInvalid; 699 ret.data.err.type = kExprLexPlainIdentifier; 700 ret.data.err.msg = _("E15: Unidentified character: %.*s"); 701 break; 702 } 703 #undef GET_CCS 704 viml_pexpr_next_token_adv_return: 705 if (!(flags & kELFlagPeek)) { 706 viml_parser_advance(pstate, ret.len); 707 } 708 return ret; 709 } 710 711 static const char *const eltkn_type_tab[] = { 712 [kExprLexInvalid] = "Invalid", 713 [kExprLexMissing] = "Missing", 714 [kExprLexSpacing] = "Spacing", 715 [kExprLexEOC] = "EOC", 716 717 [kExprLexQuestion] = "Question", 718 [kExprLexColon] = "Colon", 719 [kExprLexOr] = "Or", 720 [kExprLexAnd] = "And", 721 [kExprLexComparison] = "Comparison", 722 [kExprLexPlus] = "Plus", 723 [kExprLexMinus] = "Minus", 724 [kExprLexDot] = "Dot", 725 [kExprLexMultiplication] = "Multiplication", 726 727 [kExprLexNot] = "Not", 728 729 [kExprLexNumber] = "Number", 730 [kExprLexSingleQuotedString] = "SingleQuotedString", 731 [kExprLexDoubleQuotedString] = "DoubleQuotedString", 732 [kExprLexOption] = "Option", 733 [kExprLexRegister] = "Register", 734 [kExprLexEnv] = "Env", 735 [kExprLexPlainIdentifier] = "PlainIdentifier", 736 737 [kExprLexBracket] = "Bracket", 738 [kExprLexFigureBrace] = "FigureBrace", 739 [kExprLexParenthesis] = "Parenthesis", 740 [kExprLexComma] = "Comma", 741 [kExprLexArrow] = "Arrow", 742 [kExprLexAssignment] = "Assignment", 743 }; 744 745 const char *const eltkn_cmp_type_tab[] = { 746 [kExprCmpEqual] = "Equal", 747 [kExprCmpMatches] = "Matches", 748 [kExprCmpGreater] = "Greater", 749 [kExprCmpGreaterOrEqual] = "GreaterOrEqual", 750 [kExprCmpIdentical] = "Identical", 751 }; 752 753 const char *const expr_asgn_type_tab[] = { 754 [kExprAsgnPlain] = "Plain", 755 [kExprAsgnAdd] = "Add", 756 [kExprAsgnSubtract] = "Subtract", 757 [kExprAsgnConcat] = "Concat", 758 }; 759 760 const char *const ccs_tab[] = { 761 [kCCStrategyUseOption] = "UseOption", 762 [kCCStrategyMatchCase] = "MatchCase", 763 [kCCStrategyIgnoreCase] = "IgnoreCase", 764 }; 765 766 static const char *const eltkn_mul_type_tab[] = { 767 [kExprLexMulMul] = "Mul", 768 [kExprLexMulDiv] = "Div", 769 [kExprLexMulMod] = "Mod", 770 }; 771 772 static const char *const eltkn_opt_scope_tab[] = { 773 [kExprOptScopeUnspecified] = "Unspecified", 774 [kExprOptScopeGlobal] = "Global", 775 [kExprOptScopeLocal] = "Local", 776 }; 777 778 /// Represent token as a string 779 /// 780 /// Intended for testing and debugging purposes. 781 /// 782 /// @param[in] pstate Parser state, needed to get token string from it. May be 783 /// NULL, in which case in place of obtaining part of the 784 /// string represented by token only token length is 785 /// returned. 786 /// @param[in] token Token to represent. 787 /// @param[out] ret_size Return string size, for cases like NULs inside 788 /// a string. May be NULL. 789 /// 790 /// @return Token represented in a string form, in a static buffer (overwritten 791 /// on each call). 792 const char *viml_pexpr_repr_token(const ParserState *const pstate, const LexExprToken token, 793 size_t *const ret_size) 794 FUNC_ATTR_WARN_UNUSED_RESULT 795 { 796 static char ret[1024]; 797 char *p = ret; 798 const char *const e = &ret[1024] - 1; 799 #define ADDSTR(...) \ 800 do { \ 801 p += snprintf(p, (size_t)(sizeof(ret) - (size_t)(p - ret)), __VA_ARGS__); \ 802 if (p >= e) { \ 803 goto viml_pexpr_repr_token_end; \ 804 } \ 805 } while (0) 806 ADDSTR("%zu:%zu:%s", token.start.line, token.start.col, 807 eltkn_type_tab[token.type]); 808 switch (token.type) { 809 #define TKNARGS(tkn_type, ...) \ 810 case tkn_type: { \ 811 ADDSTR(__VA_ARGS__); \ 812 break; \ 813 } 814 TKNARGS(kExprLexComparison, "(type=%s,ccs=%s,inv=%i)", 815 eltkn_cmp_type_tab[token.data.cmp.type], 816 ccs_tab[token.data.cmp.ccs], 817 (int)token.data.cmp.inv) 818 TKNARGS(kExprLexMultiplication, "(type=%s)", 819 eltkn_mul_type_tab[token.data.mul.type]) 820 TKNARGS(kExprLexAssignment, "(type=%s)", 821 expr_asgn_type_tab[token.data.ass.type]) 822 TKNARGS(kExprLexRegister, "(name=%s)", intchar2str(token.data.reg.name)) 823 case kExprLexDoubleQuotedString: 824 TKNARGS(kExprLexSingleQuotedString, "(closed=%i)", 825 (int)token.data.str.closed) 826 TKNARGS(kExprLexOption, "(scope=%s,name=%.*s)", 827 eltkn_opt_scope_tab[token.data.opt.scope], 828 (int)token.data.opt.len, token.data.opt.name) 829 TKNARGS(kExprLexPlainIdentifier, "(scope=%s,autoload=%i)", 830 intchar2str((int)token.data.var.scope), 831 (int)token.data.var.autoload) 832 TKNARGS(kExprLexNumber, "(is_float=%i,base=%i,val=%lg)", 833 (int)token.data.num.is_float, 834 (int)token.data.num.base, 835 (double)(token.data.num.is_float 836 ? (double)token.data.num.val.floating 837 : (double)token.data.num.val.integer)) 838 TKNARGS(kExprLexInvalid, "(msg=%s)", token.data.err.msg) 839 default: 840 // No additional arguments. 841 break; 842 #undef TKNARGS 843 } 844 if (pstate == NULL) { 845 ADDSTR("::%zu", token.len); 846 } else { 847 *p++ = ':'; 848 memmove(p, &pstate->reader.lines.items[token.start.line].data[token.start.col], 849 token.len); 850 p += token.len; 851 *p = NUL; 852 } 853 #undef ADDSTR 854 viml_pexpr_repr_token_end: 855 if (ret_size != NULL) { 856 *ret_size = (size_t)(p - ret); 857 } 858 return ret; 859 } 860 861 const char *const east_node_type_tab[] = { 862 [kExprNodeMissing] = "Missing", 863 [kExprNodeOpMissing] = "OpMissing", 864 [kExprNodeTernary] = "Ternary", 865 [kExprNodeTernaryValue] = "TernaryValue", 866 [kExprNodeRegister] = "Register", 867 [kExprNodeSubscript] = "Subscript", 868 [kExprNodeListLiteral] = "ListLiteral", 869 [kExprNodeUnaryPlus] = "UnaryPlus", 870 [kExprNodeBinaryPlus] = "BinaryPlus", 871 [kExprNodeNested] = "Nested", 872 [kExprNodeCall] = "Call", 873 [kExprNodePlainIdentifier] = "PlainIdentifier", 874 [kExprNodePlainKey] = "PlainKey", 875 [kExprNodeComplexIdentifier] = "ComplexIdentifier", 876 [kExprNodeUnknownFigure] = "UnknownFigure", 877 [kExprNodeLambda] = "Lambda", 878 [kExprNodeDictLiteral] = "DictLiteral", 879 [kExprNodeCurlyBracesIdentifier] = "CurlyBracesIdentifier", 880 [kExprNodeComma] = "Comma", 881 [kExprNodeColon] = "Colon", 882 [kExprNodeArrow] = "Arrow", 883 [kExprNodeComparison] = "Comparison", 884 [kExprNodeConcat] = "Concat", 885 [kExprNodeConcatOrSubscript] = "ConcatOrSubscript", 886 [kExprNodeInteger] = "Integer", 887 [kExprNodeFloat] = "Float", 888 [kExprNodeSingleQuotedString] = "SingleQuotedString", 889 [kExprNodeDoubleQuotedString] = "DoubleQuotedString", 890 [kExprNodeOr] = "Or", 891 [kExprNodeAnd] = "And", 892 [kExprNodeUnaryMinus] = "UnaryMinus", 893 [kExprNodeBinaryMinus] = "BinaryMinus", 894 [kExprNodeNot] = "Not", 895 [kExprNodeMultiplication] = "Multiplication", 896 [kExprNodeDivision] = "Division", 897 [kExprNodeMod] = "Mod", 898 [kExprNodeOption] = "Option", 899 [kExprNodeEnvironment] = "Environment", 900 [kExprNodeAssignment] = "Assignment", 901 }; 902 903 /// Represent `int` character as a string 904 /// 905 /// Converts 906 /// - ASCII digits into '{digit}' 907 /// - ASCII printable characters into a single-character strings 908 /// - everything else to numbers. 909 /// 910 /// @param[in] ch Character to convert. 911 /// 912 /// @return Converted string, stored in a static buffer (overridden after each 913 /// call). 914 static const char *intchar2str(const int ch) 915 FUNC_ATTR_WARN_UNUSED_RESULT 916 { 917 static char buf[sizeof(int) * 3 + 1]; 918 if (' ' <= ch && ch < 0x7f) { 919 if (ascii_isdigit(ch)) { 920 buf[0] = '\''; 921 buf[1] = (char)ch; 922 buf[2] = '\''; 923 buf[3] = NUL; 924 } else { 925 buf[0] = (char)ch; 926 buf[1] = NUL; 927 } 928 } else { 929 snprintf(buf, sizeof(buf), "%i", ch); 930 } 931 return buf; 932 } 933 934 #ifdef UNIT_TESTING 935 936 REAL_FATTR_UNUSED 937 static inline void viml_pexpr_debug_print_ast_node(const ExprASTNode *const *const eastnode_p, 938 const char *const prefix) 939 { 940 if (*eastnode_p == NULL) { 941 fprintf(stderr, "%s %p : NULL\n", prefix, (void *)eastnode_p); 942 } else { 943 fprintf(stderr, "%s %p : %p : %s : %zu:%zu:%zu\n", 944 prefix, (void *)eastnode_p, (void *)(*eastnode_p), 945 east_node_type_tab[(*eastnode_p)->type], (*eastnode_p)->start.line, 946 (*eastnode_p)->start.col, (*eastnode_p)->len); 947 } 948 } 949 950 REAL_FATTR_UNUSED 951 static inline void viml_pexpr_debug_print_ast_stack(const ExprASTStack *const ast_stack, 952 const char *const msg) 953 FUNC_ATTR_NONNULL_ALL FUNC_ATTR_ALWAYS_INLINE 954 { 955 fprintf(stderr, "\n%sstack: %zu:\n", msg, kv_size(*ast_stack)); 956 for (size_t i = 0; i < kv_size(*ast_stack); i++) { 957 viml_pexpr_debug_print_ast_node((const ExprASTNode *const *)kv_A(*ast_stack, i), 958 "-"); 959 } 960 } 961 962 REAL_FATTR_UNUSED 963 static inline void viml_pexpr_debug_print_token(const ParserState *const pstate, 964 const LexExprToken token) 965 FUNC_ATTR_ALWAYS_INLINE 966 { 967 fprintf(stderr, "\ntkn: %s\n", viml_pexpr_repr_token(pstate, token, NULL)); 968 } 969 # define PSTACK(msg) \ 970 viml_pexpr_debug_print_ast_stack(&ast_stack, #msg) 971 # define PSTACK_P(msg) \ 972 viml_pexpr_debug_print_ast_stack(ast_stack, #msg) 973 # define PNODE_P(eastnode_p, msg) \ 974 viml_pexpr_debug_print_ast_node((const ExprASTNode *const *)eastnode_p, \ 975 (#msg)) 976 # define PTOKEN(tkn) \ 977 viml_pexpr_debug_print_token(pstate, tkn) 978 #endif 979 980 const uint8_t node_maxchildren[] = { 981 [kExprNodeMissing] = 0, 982 [kExprNodeOpMissing] = 2, 983 [kExprNodeTernary] = 2, 984 [kExprNodeTernaryValue] = 2, 985 [kExprNodeRegister] = 0, 986 [kExprNodeSubscript] = 2, 987 [kExprNodeListLiteral] = 1, 988 [kExprNodeUnaryPlus] = 1, 989 [kExprNodeBinaryPlus] = 2, 990 [kExprNodeNested] = 1, 991 [kExprNodeCall] = 2, 992 [kExprNodePlainIdentifier] = 0, 993 [kExprNodePlainKey] = 0, 994 [kExprNodeComplexIdentifier] = 2, 995 [kExprNodeUnknownFigure] = 1, 996 [kExprNodeLambda] = 2, 997 [kExprNodeDictLiteral] = 1, 998 [kExprNodeCurlyBracesIdentifier] = 1, 999 [kExprNodeComma] = 2, 1000 [kExprNodeColon] = 2, 1001 [kExprNodeArrow] = 2, 1002 [kExprNodeComparison] = 2, 1003 [kExprNodeConcat] = 2, 1004 [kExprNodeConcatOrSubscript] = 2, 1005 [kExprNodeInteger] = 0, 1006 [kExprNodeFloat] = 0, 1007 [kExprNodeSingleQuotedString] = 0, 1008 [kExprNodeDoubleQuotedString] = 0, 1009 [kExprNodeOr] = 2, 1010 [kExprNodeAnd] = 2, 1011 [kExprNodeUnaryMinus] = 1, 1012 [kExprNodeBinaryMinus] = 2, 1013 [kExprNodeNot] = 1, 1014 [kExprNodeMultiplication] = 2, 1015 [kExprNodeDivision] = 2, 1016 [kExprNodeMod] = 2, 1017 [kExprNodeOption] = 0, 1018 [kExprNodeEnvironment] = 0, 1019 [kExprNodeAssignment] = 2, 1020 }; 1021 1022 /// Free memory occupied by AST 1023 /// 1024 /// @param ast AST stack to free. 1025 void viml_pexpr_free_ast(ExprAST ast) 1026 { 1027 ExprASTStack ast_stack; 1028 kvi_init(ast_stack); 1029 kvi_push(ast_stack, &ast.root); 1030 while (kv_size(ast_stack)) { 1031 ExprASTNode **const cur_node = kv_last(ast_stack); 1032 #ifndef NDEBUG 1033 // Explicitly check for AST recursiveness. 1034 for (size_t i = 0; i < kv_size(ast_stack) - 1; i++) { 1035 assert(*kv_A(ast_stack, i) != *cur_node); 1036 } 1037 #endif 1038 if (*cur_node == NULL) { 1039 assert(kv_size(ast_stack) == 1); 1040 kv_drop(ast_stack, 1); 1041 } else if ((*cur_node)->children != NULL) { 1042 #ifndef NDEBUG 1043 const uint8_t maxchildren = node_maxchildren[(*cur_node)->type]; 1044 assert(maxchildren > 0); 1045 assert(maxchildren <= 2); 1046 assert(maxchildren == 1 1047 ? (*cur_node)->children->next == NULL 1048 : ((*cur_node)->children->next == NULL 1049 || (*cur_node)->children->next->next == NULL)); 1050 #endif 1051 kvi_push(ast_stack, &(*cur_node)->children); 1052 } else if ((*cur_node)->next != NULL) { 1053 kvi_push(ast_stack, &(*cur_node)->next); 1054 } else if (*cur_node != NULL) { 1055 kv_drop(ast_stack, 1); 1056 switch ((*cur_node)->type) { 1057 case kExprNodeDoubleQuotedString: 1058 case kExprNodeSingleQuotedString: 1059 xfree((*cur_node)->data.str.value); 1060 break; 1061 case kExprNodeMissing: 1062 case kExprNodeOpMissing: 1063 case kExprNodeTernary: 1064 case kExprNodeTernaryValue: 1065 case kExprNodeRegister: 1066 case kExprNodeSubscript: 1067 case kExprNodeListLiteral: 1068 case kExprNodeUnaryPlus: 1069 case kExprNodeBinaryPlus: 1070 case kExprNodeNested: 1071 case kExprNodeCall: 1072 case kExprNodePlainIdentifier: 1073 case kExprNodePlainKey: 1074 case kExprNodeComplexIdentifier: 1075 case kExprNodeUnknownFigure: 1076 case kExprNodeLambda: 1077 case kExprNodeDictLiteral: 1078 case kExprNodeCurlyBracesIdentifier: 1079 case kExprNodeAssignment: 1080 case kExprNodeComma: 1081 case kExprNodeColon: 1082 case kExprNodeArrow: 1083 case kExprNodeComparison: 1084 case kExprNodeConcat: 1085 case kExprNodeConcatOrSubscript: 1086 case kExprNodeInteger: 1087 case kExprNodeFloat: 1088 case kExprNodeOr: 1089 case kExprNodeAnd: 1090 case kExprNodeUnaryMinus: 1091 case kExprNodeBinaryMinus: 1092 case kExprNodeNot: 1093 case kExprNodeMultiplication: 1094 case kExprNodeDivision: 1095 case kExprNodeMod: 1096 case kExprNodeOption: 1097 case kExprNodeEnvironment: 1098 break; 1099 } 1100 xfree(*cur_node); 1101 *cur_node = NULL; 1102 } 1103 } 1104 kvi_destroy(ast_stack); 1105 } 1106 1107 // Binary operator precedence and associativity: 1108 // 1109 // Operator | Precedence | Associativity 1110 // ---------+------------+----------------- 1111 // || | 2 | left 1112 // && | 3 | left 1113 // cmp* | 4 | not associative 1114 // + - . | 5 | left 1115 // * / % | 6 | left 1116 // 1117 // * comparison operators: 1118 // 1119 // == ==# ==? != !=# !=? 1120 // =~ =~# =~? !~ !~# !~? 1121 // > ># >? <= <=# <=? 1122 // < <# <? >= >=# >=? 1123 // is is# is? isnot isnot# isnot? 1124 1125 /// Allocate a new node and set some of the values 1126 /// 1127 /// @param[in] type Node type to allocate. 1128 /// @param[in] level Node level to allocate 1129 static inline ExprASTNode *viml_pexpr_new_node(const ExprASTNodeType type) 1130 FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_MALLOC 1131 { 1132 ExprASTNode *ret = xmalloc(sizeof(*ret)); 1133 ret->type = type; 1134 ret->children = NULL; 1135 ret->next = NULL; 1136 return ret; 1137 } 1138 1139 static struct { 1140 ExprOpLvl lvl; 1141 ExprOpAssociativity ass; 1142 } node_type_to_node_props[] = { 1143 [kExprNodeMissing] = { kEOpLvlInvalid, kEOpAssNo, }, 1144 [kExprNodeOpMissing] = { kEOpLvlMultiplication, kEOpAssNo }, 1145 1146 [kExprNodeNested] = { kEOpLvlParens, kEOpAssNo }, 1147 // Note: below nodes are kEOpLvlSubscript for “binary operator” itself, but 1148 // kEOpLvlParens when it comes to inside the parenthesis. 1149 [kExprNodeCall] = { kEOpLvlParens, kEOpAssNo }, 1150 [kExprNodeSubscript] = { kEOpLvlParens, kEOpAssNo }, 1151 1152 [kExprNodeUnknownFigure] = { kEOpLvlParens, kEOpAssLeft }, 1153 [kExprNodeLambda] = { kEOpLvlParens, kEOpAssNo }, 1154 [kExprNodeDictLiteral] = { kEOpLvlParens, kEOpAssNo }, 1155 [kExprNodeListLiteral] = { kEOpLvlParens, kEOpAssNo }, 1156 1157 [kExprNodeArrow] = { kEOpLvlArrow, kEOpAssNo }, 1158 1159 // Right associativity for comma because this means easier access to arguments 1160 // list, etc: for "[a, b, c, d]" you can access "a" in one step if it is 1161 // represented as "list(comma(a, comma(b, comma(c, d))))" then if it is 1162 // "list(comma(comma(comma(a, b), c), d))" in which case you will need to 1163 // traverse all three comma() structures. And with comma operator (including 1164 // actual comma operator from C which is not present in Vimscript) nobody cares 1165 // about associativity, only about order of execution. 1166 [kExprNodeComma] = { kEOpLvlComma, kEOpAssRight }, 1167 1168 // Colons are not eligible for chaining, so nobody cares about associativity. 1169 [kExprNodeColon] = { kEOpLvlColon, kEOpAssNo }, 1170 1171 [kExprNodeTernary] = { kEOpLvlTernary, kEOpAssRight }, 1172 1173 [kExprNodeOr] = { kEOpLvlOr, kEOpAssLeft }, 1174 1175 [kExprNodeAnd] = { kEOpLvlAnd, kEOpAssLeft }, 1176 1177 [kExprNodeTernaryValue] = { kEOpLvlTernaryValue, kEOpAssRight }, 1178 1179 [kExprNodeComparison] = { kEOpLvlComparison, kEOpAssRight }, 1180 1181 [kExprNodeBinaryPlus] = { kEOpLvlAddition, kEOpAssLeft }, 1182 [kExprNodeBinaryMinus] = { kEOpLvlAddition, kEOpAssLeft }, 1183 [kExprNodeConcat] = { kEOpLvlAddition, kEOpAssLeft }, 1184 1185 [kExprNodeMultiplication] = { kEOpLvlMultiplication, kEOpAssLeft }, 1186 [kExprNodeDivision] = { kEOpLvlMultiplication, kEOpAssLeft }, 1187 [kExprNodeMod] = { kEOpLvlMultiplication, kEOpAssLeft }, 1188 1189 [kExprNodeUnaryPlus] = { kEOpLvlUnary, kEOpAssNo }, 1190 [kExprNodeUnaryMinus] = { kEOpLvlUnary, kEOpAssNo }, 1191 [kExprNodeNot] = { kEOpLvlUnary, kEOpAssNo }, 1192 1193 [kExprNodeConcatOrSubscript] = { kEOpLvlSubscript, kEOpAssLeft }, 1194 1195 [kExprNodeCurlyBracesIdentifier] = { kEOpLvlComplexIdentifier, kEOpAssLeft }, 1196 1197 [kExprNodeAssignment] = { kEOpLvlAssignment, kEOpAssLeft }, 1198 1199 [kExprNodeComplexIdentifier] = { kEOpLvlValue, kEOpAssLeft }, 1200 1201 [kExprNodePlainIdentifier] = { kEOpLvlValue, kEOpAssNo }, 1202 [kExprNodePlainKey] = { kEOpLvlValue, kEOpAssNo }, 1203 [kExprNodeRegister] = { kEOpLvlValue, kEOpAssNo }, 1204 [kExprNodeInteger] = { kEOpLvlValue, kEOpAssNo }, 1205 [kExprNodeFloat] = { kEOpLvlValue, kEOpAssNo }, 1206 [kExprNodeDoubleQuotedString] = { kEOpLvlValue, kEOpAssNo }, 1207 [kExprNodeSingleQuotedString] = { kEOpLvlValue, kEOpAssNo }, 1208 [kExprNodeOption] = { kEOpLvlValue, kEOpAssNo }, 1209 [kExprNodeEnvironment] = { kEOpLvlValue, kEOpAssNo }, 1210 }; 1211 1212 /// Get AST node priority level 1213 /// 1214 /// Used primary to reduce line length, so keep the name short. 1215 /// 1216 /// @param[in] node Node to get priority for. 1217 /// 1218 /// @return Node priority level. 1219 static inline ExprOpLvl node_lvl(const ExprASTNode node) 1220 FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT 1221 { 1222 return node_type_to_node_props[node.type].lvl; 1223 } 1224 1225 /// Get AST node associativity, to be used for operator nodes primary 1226 /// 1227 /// Used primary to reduce line length, so keep the name short. 1228 /// 1229 /// @param[in] node Node to get priority for. 1230 /// 1231 /// @return Node associativity. 1232 static inline ExprOpAssociativity node_ass(const ExprASTNode node) 1233 FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT 1234 { 1235 return node_type_to_node_props[node.type].ass; 1236 } 1237 1238 /// Handle binary operator 1239 /// 1240 /// This function is responsible for handling priority levels as well. 1241 /// 1242 /// @param[in] pstate Parser state, used for error reporting. 1243 /// @param ast_stack AST stack. May be popped of some values and will 1244 /// definitely receive new ones. 1245 /// @param bop_node New node to handle. 1246 /// @param[out] want_node_p New value of want_node. 1247 /// @param[out] ast_err Location where error is saved, if any. 1248 /// 1249 /// @return True if no errors occurred, false otherwise. 1250 static bool viml_pexpr_handle_bop(const ParserState *const pstate, ExprASTStack *const ast_stack, 1251 ExprASTNode *const bop_node, ExprASTWantedNode *const want_node_p, 1252 ExprASTError *const ast_err) 1253 FUNC_ATTR_NONNULL_ALL 1254 { 1255 bool ret = true; 1256 ExprASTNode **top_node_p = NULL; 1257 ExprASTNode *top_node; 1258 ExprOpLvl top_node_lvl; 1259 ExprOpAssociativity top_node_ass; 1260 assert(kv_size(*ast_stack)); 1261 const ExprOpLvl bop_node_lvl = ((bop_node->type == kExprNodeCall 1262 || bop_node->type == kExprNodeSubscript) 1263 ? kEOpLvlSubscript 1264 : node_lvl(*bop_node)); 1265 do { 1266 ExprASTNode **new_top_node_p = kv_last(*ast_stack); 1267 ExprASTNode *new_top_node = *new_top_node_p; 1268 assert(new_top_node != NULL); 1269 const ExprOpLvl new_top_node_lvl = node_lvl(*new_top_node); 1270 const ExprOpAssociativity new_top_node_ass = node_ass(*new_top_node); 1271 if (top_node_p != NULL 1272 && ((bop_node_lvl > new_top_node_lvl 1273 || (bop_node_lvl == new_top_node_lvl 1274 && new_top_node_ass == kEOpAssNo)))) { 1275 break; 1276 } 1277 kv_drop(*ast_stack, 1); 1278 top_node_p = new_top_node_p; 1279 top_node = new_top_node; 1280 top_node_lvl = new_top_node_lvl; 1281 top_node_ass = new_top_node_ass; 1282 if (bop_node_lvl == top_node_lvl && top_node_ass == kEOpAssRight) { 1283 break; 1284 } 1285 } while (kv_size(*ast_stack)); 1286 if (top_node_ass == kEOpAssLeft || top_node_lvl != bop_node_lvl) { 1287 // outer(op(x,y)) -> outer(new_op(op(x,y),*)) 1288 // 1289 // Before: top_node_p = outer(*), points to op(x,y) 1290 // Other stack elements unknown 1291 // 1292 // After: top_node_p = outer(*), points to new_op(op(x,y)) 1293 // &bop_node->children->next = new_op(op(x,y),*), points to NULL 1294 *top_node_p = bop_node; 1295 bop_node->children = top_node; 1296 assert(bop_node->children->next == NULL); 1297 kvi_push(*ast_stack, top_node_p); 1298 kvi_push(*ast_stack, &bop_node->children->next); 1299 } else { 1300 assert(top_node_lvl == bop_node_lvl && top_node_ass == kEOpAssRight); 1301 assert(top_node->children != NULL && top_node->children->next != NULL); 1302 // outer(op(x,y)) -> outer(op(x,new_op(y,*))) 1303 // 1304 // Before: top_node_p = outer(*), points to op(x,y) 1305 // Other stack elements unknown 1306 // 1307 // After: top_node_p = outer(*), points to op(x,new_op(y)) 1308 // &top_node->children->next = op(x,*), points to new_op(y) 1309 // &bop_node->children->next = new_op(y,*), points to NULL 1310 bop_node->children = top_node->children->next; 1311 top_node->children->next = bop_node; 1312 assert(bop_node->children->next == NULL); 1313 kvi_push(*ast_stack, top_node_p); 1314 kvi_push(*ast_stack, &top_node->children->next); 1315 kvi_push(*ast_stack, &bop_node->children->next); 1316 // TODO(ZyX-I): Make this not error, but treat like Python does 1317 if (bop_node->type == kExprNodeComparison) { 1318 east_set_error(pstate, ast_err, 1319 _("E15: Operator is not associative: %.*s"), 1320 bop_node->start); 1321 ret = false; 1322 } 1323 } 1324 *want_node_p = kENodeValue; 1325 return ret; 1326 } 1327 1328 /// ParserPosition literal based on ParserPosition pos with columns shifted 1329 /// 1330 /// Function does not check whether resulting position is valid. 1331 /// 1332 /// @param[in] pos Position to shift. 1333 /// @param[in] shift Number of bytes to shift. 1334 /// 1335 /// @return Shifted position. 1336 static inline ParserPosition shifted_pos(const ParserPosition pos, const size_t shift) 1337 FUNC_ATTR_CONST FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_WARN_UNUSED_RESULT 1338 { 1339 return (ParserPosition) { .line = pos.line, .col = pos.col + shift }; 1340 } 1341 1342 /// ParserPosition literal based on ParserPosition pos with specified column 1343 /// 1344 /// Function does not check whether remaining position is valid. 1345 /// 1346 /// @param[in] pos Position to adjust. 1347 /// @param[in] new_col New column. 1348 /// 1349 /// @return Shifted position. 1350 static inline ParserPosition recol_pos(const ParserPosition pos, const size_t new_col) 1351 FUNC_ATTR_CONST FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_WARN_UNUSED_RESULT 1352 { 1353 return (ParserPosition) { .line = pos.line, .col = new_col }; 1354 } 1355 1356 /// Get highlight group name 1357 #define HL(g) (is_invalid ? "NvimInvalid" #g : "Nvim" #g) 1358 1359 /// Highlight current token with the given group 1360 #define HL_CUR_TOKEN(g) \ 1361 viml_parser_highlight(pstate, cur_token.start, cur_token.len, \ 1362 HL(g)) 1363 1364 /// Allocate new node, saving some values 1365 #define NEW_NODE(type) \ 1366 viml_pexpr_new_node(type) 1367 1368 /// Set position of the given node to position from the given token 1369 /// 1370 /// @param cur_node Node to modify. 1371 /// @param cur_token Token to set position from. 1372 #define POS_FROM_TOKEN(cur_node, cur_token) \ 1373 do { \ 1374 (cur_node)->start = cur_token.start; \ 1375 (cur_node)->len = cur_token.len; \ 1376 } while (0) 1377 1378 /// Allocate new node and set its position from the current token 1379 /// 1380 /// If previous token happened to contain spacing then it will be included. 1381 /// 1382 /// @param cur_node Variable to save allocated node to. 1383 /// @param typ Node type. 1384 #define NEW_NODE_WITH_CUR_POS(cur_node, typ) \ 1385 do { \ 1386 (cur_node) = NEW_NODE(typ); \ 1387 POS_FROM_TOKEN((cur_node), cur_token); \ 1388 if (prev_token.type == kExprLexSpacing) { \ 1389 (cur_node)->start = prev_token.start; \ 1390 (cur_node)->len += prev_token.len; \ 1391 } \ 1392 } while (0) 1393 1394 /// Check whether it is possible to have next expression after current 1395 /// 1396 /// For :echo: `:echo @a @a` is a valid expression. `:echo (@a @a)` is not. 1397 #define MAY_HAVE_NEXT_EXPR \ 1398 (kv_size(ast_stack) == 1) 1399 1400 /// Add operator node 1401 /// 1402 /// @param[in] cur_node Node to add. 1403 #define ADD_OP_NODE(cur_node) \ 1404 is_invalid |= !viml_pexpr_handle_bop(pstate, &ast_stack, cur_node, \ 1405 &want_node, &ast.err) 1406 1407 /// Record missing operator: for things like 1408 /// 1409 /// :echo @a @a 1410 /// 1411 /// (allowed) or 1412 /// 1413 /// :echo (@a @a) 1414 /// 1415 /// (parsed as OpMissing(@a, @a)). 1416 #define OP_MISSING \ 1417 do { \ 1418 if (flags & kExprFlagsMulti && MAY_HAVE_NEXT_EXPR) { \ 1419 /* Multiple expressions allowed, return without calling */ \ 1420 /* viml_parser_advance(). */ \ 1421 goto viml_pexpr_parse_end; \ 1422 } else { \ 1423 assert(*top_node_p != NULL); \ 1424 ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Missing operator: %.*s")); \ 1425 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeOpMissing); \ 1426 cur_node->len = 0; \ 1427 ADD_OP_NODE(cur_node); \ 1428 goto viml_pexpr_parse_process_token; \ 1429 } \ 1430 } while (0) 1431 1432 /// Record missing value: for things like "* 5" 1433 /// 1434 /// @param[in] msg Error message. 1435 #define ADD_VALUE_IF_MISSING(msg) \ 1436 do { \ 1437 if (want_node == kENodeValue) { \ 1438 ERROR_FROM_TOKEN_AND_MSG(cur_token, (msg)); \ 1439 NEW_NODE_WITH_CUR_POS((*top_node_p), kExprNodeMissing); \ 1440 (*top_node_p)->len = 0; \ 1441 want_node = kENodeOperator; \ 1442 } \ 1443 } while (0) 1444 1445 /// Set AST error, unless AST already is not correct 1446 /// 1447 /// @param[out] ret_ast AST to set error in. 1448 /// @param[in] pstate Parser state, used to get error message argument. 1449 /// @param[in] msg Error message, assumed to be already translated and 1450 /// containing a single %token "%.*s". 1451 /// @param[in] start Position at which error occurred. 1452 static inline void east_set_error(const ParserState *const pstate, ExprASTError *const ret_ast_err, 1453 const char *const msg, const ParserPosition start) 1454 FUNC_ATTR_NONNULL_ALL FUNC_ATTR_ALWAYS_INLINE 1455 { 1456 if (ret_ast_err->msg != NULL) { 1457 return; 1458 } 1459 const ParserLine pline = pstate->reader.lines.items[start.line]; 1460 ret_ast_err->msg = msg; 1461 ret_ast_err->arg_len = (int)(pline.size - start.col); 1462 ret_ast_err->arg = pline.data ? pline.data + start.col : NULL; 1463 } 1464 1465 /// Set error from the given token and given message 1466 #define ERROR_FROM_TOKEN_AND_MSG(cur_token, msg) \ 1467 do { \ 1468 is_invalid = true; \ 1469 east_set_error(pstate, &ast.err, msg, cur_token.start); \ 1470 } while (0) 1471 1472 /// Like #ERROR_FROM_TOKEN_AND_MSG, but gets position from a node 1473 #define ERROR_FROM_NODE_AND_MSG(node, msg) \ 1474 do { \ 1475 is_invalid = true; \ 1476 east_set_error(pstate, &ast.err, msg, node->start); \ 1477 } while (0) 1478 1479 /// Set error from the given kExprLexInvalid token 1480 #define ERROR_FROM_TOKEN(cur_token) \ 1481 ERROR_FROM_TOKEN_AND_MSG(cur_token, cur_token.data.err.msg) 1482 1483 /// Select figure brace type, altering highlighting as well if needed 1484 /// 1485 /// @param[out] node Node to modify type. 1486 /// @param[in] new_type New type, one of ExprASTNodeType values without 1487 /// kExprNode prefix. 1488 /// @param[in] hl Corresponding highlighting, passed as an argument to #HL. 1489 #define SELECT_FIGURE_BRACE_TYPE(node, new_type, hl) \ 1490 do { \ 1491 ExprASTNode *const node_ = (node); \ 1492 assert(node_->type == kExprNodeUnknownFigure \ 1493 || node_->type == kExprNode##new_type); \ 1494 node_->type = kExprNode##new_type; \ 1495 if (pstate->colors) { \ 1496 kv_A(*pstate->colors, node_->data.fig.opening_hl_idx).group = \ 1497 HL(hl); \ 1498 } \ 1499 } while (0) 1500 1501 /// Add identifier which should constitute complex identifier node 1502 /// 1503 /// This one is to be called only in case want_node is kENodeOperator. 1504 /// 1505 /// @param new_ident_node_code Code used to create a new identifier node and 1506 /// update want_node and ast_stack, without 1507 /// a trailing semicolon. 1508 /// @param hl Highlighting name to use, passed as an argument to #HL. 1509 #define ADD_IDENT(new_ident_node_code, hl) \ 1510 do { \ 1511 assert(want_node == kENodeOperator); \ 1512 /* Operator: may only be curly braces name, but only under certain */ \ 1513 /* conditions. */ \ 1514 /* First condition is that there is no space before a part of complex */ \ 1515 /* identifier. */ \ 1516 if (prev_token.type == kExprLexSpacing) { \ 1517 OP_MISSING; \ 1518 } \ 1519 switch ((*top_node_p)->type) { \ 1520 /* Second is that previous node is one of the identifiers: */ \ 1521 /* complex, plain, curly braces. */ \ 1522 /* TODO(ZyX-I): Extend syntax to allow ${expr}. This is needed to */ \ 1523 /* handle environment variables like those bash uses for */ \ 1524 /* `export -f`: their names consist not only of alphanumeric */ \ 1525 /* characters. */ \ 1526 case kExprNodeComplexIdentifier: \ 1527 case kExprNodePlainIdentifier: \ 1528 case kExprNodeCurlyBracesIdentifier: { \ 1529 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeComplexIdentifier); \ 1530 cur_node->len = 0; \ 1531 cur_node->children = *top_node_p; \ 1532 *top_node_p = cur_node; \ 1533 kvi_push(ast_stack, &cur_node->children->next); \ 1534 ExprASTNode **const new_top_node_p = kv_last(ast_stack); \ 1535 assert(*new_top_node_p == NULL); \ 1536 new_ident_node_code; \ 1537 *new_top_node_p = cur_node; \ 1538 HL_CUR_TOKEN(hl); \ 1539 break; \ 1540 } \ 1541 default: { \ 1542 OP_MISSING; \ 1543 break; \ 1544 } \ 1545 } \ 1546 } while (0) 1547 1548 /// Determine whether given parse type is an assignment 1549 /// 1550 /// @param[in] pt Checked parse type. 1551 /// 1552 /// @return true if parsing an assignment, false otherwise. 1553 static inline bool pt_is_assignment(const ExprASTParseType pt) 1554 FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT 1555 { 1556 return (pt == kEPTAssignment || pt == kEPTSingleAssignment); 1557 } 1558 1559 /// Structure used to define “string shifts” necessary to map string 1560 /// highlighting to actual strings. 1561 typedef struct { 1562 size_t start; ///< Where special character starts in original string. 1563 size_t orig_len; ///< Length of orininal string (e.g. 4 for "\x80"). 1564 size_t act_len; ///< Length of resulting character(s) (e.g. 1 for "\x80"). 1565 bool escape_not_known; ///< True if escape sequence in original is not known. 1566 } StringShift; 1567 1568 /// Parse and highlight single- or double-quoted string 1569 /// 1570 /// Function is supposed to detect and highlight regular expressions (but does 1571 /// not do now). 1572 /// 1573 /// @param[out] pstate Parser state which also contains a place where 1574 /// highlighting is saved. 1575 /// @param[out] node Node where string parsing results are saved. 1576 /// @param[in] token Token to highlight. 1577 /// @param[in] ast_stack Parser AST stack, used to detect whether current 1578 /// string is a regex. 1579 /// @param[in] is_invalid Whether currently processed token is not valid. 1580 static void parse_quoted_string(ParserState *const pstate, ExprASTNode *const node, 1581 const LexExprToken token, const ExprASTStack *ast_stack, 1582 const bool is_invalid) 1583 FUNC_ATTR_NONNULL_ALL 1584 { 1585 const ParserLine pline = pstate->reader.lines.items[token.start.line]; 1586 const char *const s = pline.data + token.start.col; 1587 const char *const e = s + token.len - token.data.str.closed; 1588 const char *p = s + 1; 1589 const bool is_double = (token.type == kExprLexDoubleQuotedString); 1590 size_t size = token.len - token.data.str.closed - 1; 1591 kvec_withinit_t(StringShift, 16) shifts; 1592 kvi_init(shifts); 1593 if (!is_double) { 1594 viml_parser_highlight(pstate, token.start, 1, HL(SingleQuote)); 1595 while (p < e) { 1596 const char *const chunk_e = memchr(p, '\'', (size_t)(e - p)); 1597 if (chunk_e == NULL) { 1598 break; 1599 } 1600 size--; 1601 p = chunk_e + 2; 1602 if (pstate->colors) { 1603 kvi_push(shifts, ((StringShift) { 1604 .start = token.start.col + (size_t)(chunk_e - s), 1605 .orig_len = 2, 1606 .act_len = 1, 1607 .escape_not_known = false, 1608 })); 1609 } 1610 } 1611 node->data.str.size = size; 1612 if (size == 0) { 1613 node->data.str.value = NULL; 1614 } else { 1615 char *v_p; 1616 v_p = node->data.str.value = xmallocz(size); 1617 p = s + 1; 1618 while (p < e) { 1619 const char *const chunk_e = memchr(p, '\'', (size_t)(e - p)); 1620 if (chunk_e == NULL) { 1621 memcpy(v_p, p, (size_t)(e - p)); 1622 break; 1623 } 1624 memcpy(v_p, p, (size_t)(chunk_e - p)); 1625 v_p += (size_t)(chunk_e - p) + 1; 1626 v_p[-1] = '\''; 1627 p = chunk_e + 2; 1628 } 1629 } 1630 } else { 1631 viml_parser_highlight(pstate, token.start, 1, HL(DoubleQuote)); 1632 for (p = s + 1; p < e; p++) { 1633 if (*p == '\\' && p + 1 < e) { 1634 p++; 1635 if (p + 1 == e) { 1636 size--; 1637 break; 1638 } 1639 switch (*p) { 1640 // A "\<x>" form occupies at least 4 characters, and produces up to 1641 // to 9 characters (6 for the char and 3 for a modifier): 1642 // reserve space for 5 extra, but do not compute actual length 1643 // just now, it would be costly. 1644 case '<': 1645 size += 5; 1646 break; 1647 // Hexadecimal, always single byte, but at least three bytes each. 1648 case 'x': 1649 case 'X': 1650 size--; 1651 if (ascii_isxdigit(p[1])) { 1652 size--; 1653 if (p + 2 < e && ascii_isxdigit(p[2])) { 1654 size--; 1655 } 1656 } 1657 break; 1658 // Unicode 1659 // 1660 // \uF takes 1 byte which is 2 bytes less then escape sequence. 1661 // \uFF: 2 bytes, 2 bytes less. 1662 // \uFFF: 3 bytes, 2 bytes less. 1663 // \uFFFF: 3 bytes, 3 bytes less. 1664 // \UFFFFF: 4 bytes, 3 bytes less. 1665 // \UFFFFFF: 5 bytes, 3 bytes less. 1666 // \UFFFFFFF: 6 bytes, 3 bytes less. 1667 // \U7FFFFFFF: 6 bytes, 4 bytes less. 1668 case 'u': 1669 case 'U': { 1670 const char *const esc_start = p; 1671 size_t n = (*p == 'u' ? 4 : 8); 1672 int nr = 0; 1673 p++; 1674 while (p + 1 < e && n-- && ascii_isxdigit(p[1])) { 1675 p++; 1676 nr = (nr << 4) + hex2nr(*p); 1677 } 1678 // Escape length: (esc_start - 1) points to "\\", esc_start to "u" 1679 // or "U", p to the byte after last byte. So escape sequence 1680 // occupies p - (esc_start - 1), but it stands for a utf_char2len 1681 // bytes. 1682 size -= (size_t)((p - (esc_start - 1)) - utf_char2len(nr)); 1683 p--; 1684 break; 1685 } 1686 // Octal, always single byte, but at least two bytes each. 1687 case '0': 1688 case '1': 1689 case '2': 1690 case '3': 1691 case '4': 1692 case '5': 1693 case '6': 1694 case '7': 1695 size--; 1696 p++; 1697 if (*p >= '0' && *p <= '7') { 1698 size--; 1699 p++; 1700 if (p < e && *p >= '0' && *p <= '7') { 1701 size--; 1702 p++; 1703 } 1704 } 1705 break; 1706 default: 1707 size--; 1708 break; 1709 } 1710 } 1711 } 1712 if (size == 0) { 1713 node->data.str.value = NULL; 1714 node->data.str.size = 0; 1715 } else { 1716 char *v_p; 1717 v_p = node->data.str.value = xmalloc(size); 1718 p = s + 1; 1719 while (p < e) { 1720 const char *const chunk_e = memchr(p, '\\', (size_t)(e - p)); 1721 if (chunk_e == NULL) { 1722 memcpy(v_p, p, (size_t)(e - p)); 1723 v_p += e - p; 1724 break; 1725 } 1726 memcpy(v_p, p, (size_t)(chunk_e - p)); 1727 v_p += (size_t)(chunk_e - p); 1728 p = chunk_e + 1; 1729 if (p == e) { 1730 *v_p++ = '\\'; 1731 break; 1732 } 1733 bool is_unknown = false; 1734 const char *const v_p_start = v_p; 1735 switch (*p) { 1736 #define SINGLE_CHAR_ESC(ch, real_ch) \ 1737 case ch: { \ 1738 *v_p++ = real_ch; \ 1739 p++; \ 1740 break; \ 1741 } 1742 SINGLE_CHAR_ESC('b', BS) 1743 SINGLE_CHAR_ESC('e', ESC) 1744 SINGLE_CHAR_ESC('f', FF) 1745 SINGLE_CHAR_ESC('n', NL) 1746 SINGLE_CHAR_ESC('r', CAR) 1747 SINGLE_CHAR_ESC('t', TAB) 1748 SINGLE_CHAR_ESC('"', '"') 1749 SINGLE_CHAR_ESC('\\', '\\') 1750 #undef SINGLE_CHAR_ESC 1751 1752 // Hexadecimal or unicode. 1753 case 'X': 1754 case 'x': 1755 case 'u': 1756 case 'U': 1757 if (p + 1 < e && ascii_isxdigit(p[1])) { 1758 size_t n; 1759 int nr; 1760 bool is_hex = (*p == 'x' || *p == 'X'); 1761 1762 if (is_hex) { 1763 n = 2; 1764 } else if (*p == 'u') { 1765 n = 4; 1766 } else { 1767 n = 8; 1768 } 1769 nr = 0; 1770 while (p + 1 < e && n-- && ascii_isxdigit(p[1])) { 1771 p++; 1772 nr = (nr << 4) + hex2nr(*p); 1773 } 1774 p++; 1775 if (is_hex) { 1776 *v_p++ = (char)nr; 1777 } else { 1778 v_p += utf_char2bytes(nr, v_p); 1779 } 1780 } else { 1781 is_unknown = true; 1782 *v_p++ = *p; 1783 p++; 1784 } 1785 break; 1786 // Octal: "\1", "\12", "\123". 1787 case '0': 1788 case '1': 1789 case '2': 1790 case '3': 1791 case '4': 1792 case '5': 1793 case '6': 1794 case '7': { 1795 uint8_t ch = (uint8_t)(*p++ - '0'); 1796 if (p < e && *p >= '0' && *p <= '7') { 1797 ch = (uint8_t)((ch << 3) + *p++ - '0'); 1798 if (p < e && *p >= '0' && *p <= '7') { 1799 ch = (uint8_t)((ch << 3) + *p++ - '0'); 1800 } 1801 } 1802 *v_p++ = (char)ch; 1803 break; 1804 } 1805 // Special key, e.g.: "\<C-W>" 1806 case '<': { 1807 int flags = FSK_KEYCODE | FSK_IN_STRING; 1808 1809 if (p[1] != '*') { 1810 flags |= FSK_SIMPLIFY; 1811 } 1812 const size_t special_len = trans_special(&p, (size_t)(e - p), 1813 v_p, flags, false, NULL); 1814 if (special_len != 0) { 1815 v_p += special_len; 1816 } else { 1817 is_unknown = true; 1818 mb_copy_char(&p, &v_p); 1819 } 1820 break; 1821 } 1822 default: 1823 is_unknown = true; 1824 mb_copy_char(&p, &v_p); 1825 break; 1826 } 1827 if (pstate->colors) { 1828 kvi_push(shifts, ((StringShift) { 1829 .start = token.start.col + (size_t)(chunk_e - s), 1830 .orig_len = (size_t)(p - chunk_e), 1831 .act_len = (size_t)(v_p - (char *)v_p_start), 1832 .escape_not_known = is_unknown, 1833 })); 1834 } 1835 } 1836 node->data.str.size = (size_t)(v_p - node->data.str.value); 1837 } 1838 } 1839 if (pstate->colors) { 1840 // TODO(ZyX-I): use ast_stack to determine and highlight regular expressions 1841 // TODO(ZyX-I): use ast_stack to determine and highlight printf format str 1842 // TODO(ZyX-I): use ast_stack to determine and highlight expression strings 1843 size_t next_col = token.start.col + 1; 1844 const char *const body_str = (is_double 1845 ? HL(DoubleQuotedBody) 1846 : HL(SingleQuotedBody)); 1847 const char *const esc_str = (is_double 1848 ? HL(DoubleQuotedEscape) 1849 : HL(SingleQuotedQuote)); 1850 const char *const ukn_esc_str = (is_double 1851 ? HL(DoubleQuotedUnknownEscape) 1852 : HL(SingleQuotedUnknownEscape)); 1853 for (size_t i = 0; i < kv_size(shifts); i++) { 1854 const StringShift cur_shift = kv_A(shifts, i); 1855 if (cur_shift.start > next_col) { 1856 viml_parser_highlight(pstate, recol_pos(token.start, next_col), 1857 cur_shift.start - next_col, 1858 body_str); 1859 } 1860 viml_parser_highlight(pstate, recol_pos(token.start, cur_shift.start), 1861 cur_shift.orig_len, 1862 (cur_shift.escape_not_known 1863 ? ukn_esc_str 1864 : esc_str)); 1865 next_col = cur_shift.start + cur_shift.orig_len; 1866 } 1867 if (next_col - token.start.col < token.len - token.data.str.closed) { 1868 viml_parser_highlight(pstate, recol_pos(token.start, next_col), 1869 (token.start.col 1870 + token.len 1871 - token.data.str.closed 1872 - next_col), 1873 body_str); 1874 } 1875 } 1876 if (token.data.str.closed) { 1877 if (is_double) { 1878 viml_parser_highlight(pstate, shifted_pos(token.start, token.len - 1), 1879 1, HL(DoubleQuote)); 1880 } else { 1881 viml_parser_highlight(pstate, shifted_pos(token.start, token.len - 1), 1882 1, HL(SingleQuote)); 1883 } 1884 } 1885 kvi_destroy(shifts); 1886 } 1887 1888 /// Additional flags to pass to lexer depending on want_node 1889 static const int want_node_to_lexer_flags[] = { 1890 [kENodeValue] = kELFlagIsNotCmp, 1891 [kENodeOperator] = kELFlagForbidScope, 1892 }; 1893 1894 /// Number of characters to highlight as NumberPrefix depending on the base 1895 static const uint8_t base_to_prefix_length[] = { 1896 [2] = 2, 1897 [8] = 1, 1898 [10] = 0, 1899 [16] = 2, 1900 }; 1901 1902 /// Parse one Vimscript expression 1903 /// 1904 /// @param pstate Parser state. 1905 /// @param[in] flags Additional flags, see ExprParserFlags 1906 /// 1907 /// @return Parsed AST. 1908 ExprAST viml_pexpr_parse(ParserState *const pstate, const int flags) 1909 FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL 1910 { 1911 ExprAST ast = { 1912 .err = { 1913 .msg = NULL, 1914 .arg_len = 0, 1915 .arg = NULL, 1916 }, 1917 .root = NULL, 1918 }; 1919 // Expression stack contains current branch in AST tree: that is 1920 // - Stack item 0 contains root of the tree, i.e. &ast->root. 1921 // - Stack item i points to the previous stack items’ last child. 1922 // 1923 // When parser expects “value” node that is something like identifier or "[" 1924 // (list start) last stack item contains NULL. Otherwise last stack item is 1925 // supposed to contain last “finished” value: e.g. "1" or "+(1, 1)" (node 1926 // representing "1+1"). 1927 ExprASTStack ast_stack; 1928 kvi_init(ast_stack); 1929 kvi_push(ast_stack, &ast.root); 1930 ExprASTWantedNode want_node = kENodeValue; 1931 ExprASTParseTypeStack pt_stack; 1932 kvi_init(pt_stack); 1933 kvi_push(pt_stack, kEPTExpr); 1934 if (flags & kExprFlagsParseLet) { 1935 kvi_push(pt_stack, kEPTAssignment); 1936 } 1937 LexExprToken prev_token = { .type = kExprLexMissing }; 1938 bool highlighted_prev_spacing = false; 1939 // Lambda node, valid when parsing lambda arguments only. 1940 ExprASTNode *lambda_node = NULL; 1941 size_t asgn_level = 0; 1942 do { 1943 const bool is_concat_or_subscript = ( 1944 want_node == kENodeValue 1945 && kv_size(ast_stack) > 1 1946 && (*kv_Z(ast_stack, 1947 1))->type == kExprNodeConcatOrSubscript); 1948 const int lexer_additional_flags = ( 1949 kELFlagPeek 1950 | ((flags & kExprFlagsDisallowEOC) ? kELFlagForbidEOC : 0) 1951 | ((want_node == kENodeValue 1952 && (kv_size(ast_stack) == 1 1953 || ((*kv_Z(ast_stack, 1))->type != kExprNodeConcat 1954 && ((*kv_Z(ast_stack, 1))->type 1955 != kExprNodeConcatOrSubscript)))) 1956 ? kELFlagAllowFloat 1957 : 0)); 1958 LexExprToken cur_token = viml_pexpr_next_token(pstate, 1959 want_node_to_lexer_flags[want_node] | 1960 lexer_additional_flags); 1961 if (cur_token.type == kExprLexEOC) { 1962 break; 1963 } 1964 LexExprTokenType tok_type = cur_token.type; 1965 const bool token_invalid = (tok_type == kExprLexInvalid); 1966 bool is_invalid = token_invalid; 1967 viml_pexpr_parse_process_token: 1968 // May use different flags this time. 1969 cur_token = viml_pexpr_next_token(pstate, 1970 want_node_to_lexer_flags[want_node] | lexer_additional_flags); 1971 if (tok_type == kExprLexSpacing) { 1972 if (is_invalid) { 1973 HL_CUR_TOKEN(Spacing); 1974 } else { 1975 // Do not do anything: let regular spacing be highlighted as normal. 1976 // This also allows later to highlight spacing as invalid. 1977 } 1978 goto viml_pexpr_parse_cycle_end; 1979 } else if (is_invalid && prev_token.type == kExprLexSpacing 1980 && !highlighted_prev_spacing) { 1981 viml_parser_highlight(pstate, prev_token.start, prev_token.len, 1982 HL(Spacing)); 1983 is_invalid = false; 1984 highlighted_prev_spacing = true; 1985 } 1986 const ParserLine pline = pstate->reader.lines.items[cur_token.start.line]; 1987 ExprASTNode **const top_node_p = kv_last(ast_stack); 1988 assert(kv_size(ast_stack) >= 1); 1989 ExprASTNode *cur_node = NULL; 1990 #ifndef NDEBUG 1991 const bool want_value = (want_node == kENodeValue); 1992 assert(want_value == (*top_node_p == NULL)); 1993 assert(kv_A(ast_stack, 0) == &ast.root); 1994 // Check that stack item i + 1 points to stack items’ i *last* child. 1995 for (size_t i = 0; i + 1 < kv_size(ast_stack); i++) { 1996 const bool item_null = (want_value && i + 2 == kv_size(ast_stack)); 1997 assert((&(*kv_A(ast_stack, i))->children == kv_A(ast_stack, i + 1) 1998 && (item_null 1999 ? (*kv_A(ast_stack, i))->children == NULL 2000 : (*kv_A(ast_stack, i))->children->next == NULL)) 2001 || ((&(*kv_A(ast_stack, i))->children->next 2002 == kv_A(ast_stack, i + 1)) 2003 && (item_null 2004 ? (*kv_A(ast_stack, i))->children->next == NULL 2005 : (*kv_A(ast_stack, i))->children->next->next == NULL))); 2006 } 2007 #endif 2008 // Note: in Vim whether expression "cond?d.a:2" is valid depends both on 2009 // "cond" and whether "d" is a dictionary: expression is valid if condition 2010 // is true and "d" is a dictionary (with "a" key or it will complain about 2011 // missing one, but this is not relevant); if any of the requirements is 2012 // broken then this thing is parsed as "d . a:2" yielding missing colon 2013 // error. This parser does not allow such ambiguity, especially because it 2014 // simply can’t: whether "d" is a dictionary is not known at the parsing 2015 // time. 2016 // 2017 // Here example will always contain a concat with "a:2" sucking colon, 2018 // making expression invalid both because there is no longer a spare colon 2019 // for ternary and because concatenating dictionary with anything is not 2020 // valid. There are more cases when this will make a difference though. 2021 const bool node_is_key = ( 2022 is_concat_or_subscript 2023 && (cur_token.type == kExprLexPlainIdentifier 2024 ? (!cur_token.data.var.autoload 2025 && cur_token.data.var.scope == kExprVarScopeMissing) 2026 : (cur_token.type == kExprLexNumber)) 2027 && prev_token.type != kExprLexSpacing); 2028 if (is_concat_or_subscript && !node_is_key) { 2029 // Note: in Vim "d. a" (this is the reason behind `prev_token.type != 2030 // kExprLexSpacing` part of the condition) as well as any other "d.{expr}" 2031 // where "{expr}" does not look like a key is invalid whenever "d" happens 2032 // to be a dictionary. Since parser has no idea whether preceding 2033 // expression is actually a dictionary it can’t outright reject anything, 2034 // so it turns kExprNodeConcatOrSubscript into kExprNodeConcat instead, 2035 // which will yield different errors then Vim does in a number of 2036 // circumstances, and in any case runtime and not parse time errors. 2037 (*kv_Z(ast_stack, 1))->type = kExprNodeConcat; 2038 } 2039 // Pop some stack pt_stack items in case of misplaced nodes. 2040 const bool is_single_assignment = kv_last(pt_stack) == kEPTSingleAssignment; 2041 switch (kv_last(pt_stack)) { 2042 case kEPTExpr: 2043 break; 2044 case kEPTLambdaArguments: 2045 if ((want_node == kENodeOperator 2046 && tok_type != kExprLexComma 2047 && tok_type != kExprLexArrow) 2048 || (want_node == kENodeValue 2049 && !(cur_token.type == kExprLexPlainIdentifier 2050 && cur_token.data.var.scope == kExprVarScopeMissing 2051 && !cur_token.data.var.autoload) 2052 && tok_type != kExprLexArrow)) { 2053 lambda_node->data.fig.type_guesses.allow_lambda = false; 2054 if (lambda_node->children != NULL 2055 && lambda_node->children->type == kExprNodeComma) { 2056 // If lambda has comma child this means that parser has already seen 2057 // at least "{arg1,", so node cannot possibly be anything, but 2058 // lambda. 2059 2060 // Vim may give E121 or E720 in this case, but it does not look 2061 // right to have either because both are results of reevaluation 2062 // possibly-lambda node as a dictionary and here this is not going 2063 // to happen. 2064 ERROR_FROM_TOKEN_AND_MSG(cur_token, 2065 _("E15: Expected lambda arguments list or arrow: %.*s")); 2066 } else { 2067 // Else it may appear that possibly-lambda node is actually 2068 // a dictionary or curly-braces-name identifier. 2069 lambda_node = NULL; 2070 kv_drop(pt_stack, 1); 2071 } 2072 } 2073 break; 2074 case kEPTSingleAssignment: 2075 case kEPTAssignment: 2076 if (want_node == kENodeValue 2077 && tok_type != kExprLexBracket 2078 && tok_type != kExprLexPlainIdentifier 2079 && (tok_type != kExprLexFigureBrace || cur_token.data.brc.closing) 2080 && !(node_is_key && tok_type == kExprLexNumber) 2081 && tok_type != kExprLexEnv 2082 && tok_type != kExprLexOption 2083 && tok_type != kExprLexRegister) { 2084 ERROR_FROM_TOKEN_AND_MSG(cur_token, 2085 _("E15: Expected value part of assignment lvalue: %.*s")); 2086 kv_drop(pt_stack, 1); 2087 } else if (want_node == kENodeOperator 2088 && tok_type != kExprLexBracket 2089 && (tok_type != kExprLexFigureBrace 2090 || cur_token.data.brc.closing) 2091 && tok_type != kExprLexDot 2092 && (tok_type != kExprLexComma || !is_single_assignment) 2093 && tok_type != kExprLexAssignment 2094 // Curly brace identifiers: will contain plain identifier or 2095 // another curly brace in position where operator is wanted. 2096 && !((tok_type == kExprLexPlainIdentifier 2097 || (tok_type == kExprLexFigureBrace 2098 && !cur_token.data.brc.closing)) 2099 && prev_token.type != kExprLexSpacing)) { 2100 if (flags & kExprFlagsMulti && MAY_HAVE_NEXT_EXPR) { 2101 goto viml_pexpr_parse_end; 2102 } 2103 ERROR_FROM_TOKEN_AND_MSG(cur_token, 2104 _("E15: Expected assignment operator or subscript: %.*s")); 2105 kv_drop(pt_stack, 1); 2106 } 2107 assert(kv_size(pt_stack)); 2108 break; 2109 } 2110 assert(kv_size(pt_stack)); 2111 const ExprASTParseType cur_pt = kv_last(pt_stack); 2112 assert(lambda_node == NULL || cur_pt == kEPTLambdaArguments); 2113 #define SIMPLE_UB_OP(op) \ 2114 case kExprLex##op: { \ 2115 if (want_node == kENodeValue) { \ 2116 /* Value level: assume unary operator. */ \ 2117 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeUnary##op); \ 2118 *top_node_p = cur_node; \ 2119 kvi_push(ast_stack, &cur_node->children); \ 2120 HL_CUR_TOKEN(Unary##op); \ 2121 } else { \ 2122 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeBinary##op); \ 2123 ADD_OP_NODE(cur_node); \ 2124 HL_CUR_TOKEN(Binary##op); \ 2125 } \ 2126 want_node = kENodeValue; \ 2127 break; \ 2128 } 2129 switch (tok_type) { 2130 case kExprLexMissing: 2131 case kExprLexSpacing: 2132 case kExprLexEOC: 2133 abort(); 2134 case kExprLexInvalid: 2135 ERROR_FROM_TOKEN(cur_token); 2136 tok_type = cur_token.data.err.type; 2137 goto viml_pexpr_parse_process_token; 2138 case kExprLexRegister: { 2139 if (want_node == kENodeOperator) { 2140 // Register in operator position: e.g. @a @a 2141 OP_MISSING; 2142 } 2143 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeRegister); 2144 cur_node->data.reg.name = cur_token.data.reg.name; 2145 *top_node_p = cur_node; 2146 want_node = kENodeOperator; 2147 HL_CUR_TOKEN(Register); 2148 break; 2149 } 2150 SIMPLE_UB_OP(Plus) 2151 SIMPLE_UB_OP(Minus) 2152 #undef SIMPLE_UB_OP 2153 #define SIMPLE_B_OP(op, msg) \ 2154 case kExprLex##op: { \ 2155 ADD_VALUE_IF_MISSING(_("E15: Unexpected " msg ": %.*s")); \ 2156 NEW_NODE_WITH_CUR_POS(cur_node, kExprNode##op); \ 2157 HL_CUR_TOKEN(op); \ 2158 ADD_OP_NODE(cur_node); \ 2159 break; \ 2160 } 2161 SIMPLE_B_OP(Or, "or operator") 2162 SIMPLE_B_OP(And, "and operator") 2163 #undef SIMPLE_B_OP 2164 case kExprLexMultiplication: 2165 ADD_VALUE_IF_MISSING(_("E15: Unexpected multiplication-like operator: %.*s")); 2166 switch (cur_token.data.mul.type) { 2167 #define MUL_OP(lex_op_tail, node_op_tail) \ 2168 case kExprLexMul##lex_op_tail: { \ 2169 NEW_NODE_WITH_CUR_POS(cur_node, kExprNode##node_op_tail); \ 2170 HL_CUR_TOKEN(node_op_tail); \ 2171 break; \ 2172 } 2173 MUL_OP(Mul, Multiplication) 2174 MUL_OP(Div, Division) 2175 MUL_OP(Mod, Mod) 2176 #undef MUL_OP 2177 } 2178 ADD_OP_NODE(cur_node); 2179 break; 2180 case kExprLexOption: { 2181 if (want_node == kENodeOperator) { 2182 OP_MISSING; 2183 } 2184 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeOption); 2185 if (cur_token.type == kExprLexInvalid) { 2186 assert(cur_token.len == 1 2187 || (cur_token.len == 3 2188 && pline.data[cur_token.start.col + 2] == ':')); 2189 cur_node->data.opt.ident = ( 2190 pline.data + cur_token.start.col + cur_token.len); 2191 cur_node->data.opt.ident_len = 0; 2192 cur_node->data.opt.scope = ( 2193 cur_token.len == 3 2194 ? (ExprOptScope)pline.data[cur_token.start.col + 1] 2195 : kExprOptScopeUnspecified); 2196 } else { 2197 cur_node->data.opt.ident = cur_token.data.opt.name; 2198 cur_node->data.opt.ident_len = cur_token.data.opt.len; 2199 cur_node->data.opt.scope = cur_token.data.opt.scope; 2200 } 2201 *top_node_p = cur_node; 2202 want_node = kENodeOperator; 2203 viml_parser_highlight(pstate, cur_token.start, 1, HL(OptionSigil)); 2204 const size_t scope_shift = ( 2205 cur_token.data.opt.scope == kExprOptScopeUnspecified ? 0 : 2); 2206 if (scope_shift) { 2207 viml_parser_highlight(pstate, shifted_pos(cur_token.start, 1), 1, 2208 HL(OptionScope)); 2209 viml_parser_highlight(pstate, shifted_pos(cur_token.start, 2), 1, 2210 HL(OptionScopeDelimiter)); 2211 } 2212 viml_parser_highlight(pstate, shifted_pos(cur_token.start, scope_shift + 1), 2213 cur_token.len - (scope_shift + 1), HL(OptionName)); 2214 break; 2215 } 2216 case kExprLexEnv: 2217 if (want_node == kENodeOperator) { 2218 OP_MISSING; 2219 } 2220 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeEnvironment); 2221 cur_node->data.env.ident = pline.data + cur_token.start.col + 1; 2222 cur_node->data.env.ident_len = cur_token.len - 1; 2223 if (cur_node->data.env.ident_len == 0) { 2224 ERROR_FROM_TOKEN_AND_MSG(cur_token, 2225 _("E15: Environment variable name missing")); 2226 } 2227 *top_node_p = cur_node; 2228 want_node = kENodeOperator; 2229 viml_parser_highlight(pstate, cur_token.start, 1, HL(EnvironmentSigil)); 2230 viml_parser_highlight(pstate, shifted_pos(cur_token.start, 1), 2231 cur_token.len - 1, HL(EnvironmentName)); 2232 break; 2233 case kExprLexNot: 2234 if (want_node == kENodeOperator) { 2235 OP_MISSING; 2236 } 2237 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeNot); 2238 *top_node_p = cur_node; 2239 kvi_push(ast_stack, &cur_node->children); 2240 HL_CUR_TOKEN(Not); 2241 break; 2242 case kExprLexComparison: 2243 ADD_VALUE_IF_MISSING(_("E15: Expected value, got comparison operator: %.*s")); 2244 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeComparison); 2245 if (cur_token.type == kExprLexInvalid) { 2246 cur_node->data.cmp.ccs = kCCStrategyUseOption; 2247 cur_node->data.cmp.type = kExprCmpEqual; 2248 cur_node->data.cmp.inv = false; 2249 } else { 2250 cur_node->data.cmp.ccs = cur_token.data.cmp.ccs; 2251 cur_node->data.cmp.type = cur_token.data.cmp.type; 2252 cur_node->data.cmp.inv = cur_token.data.cmp.inv; 2253 } 2254 ADD_OP_NODE(cur_node); 2255 if (cur_token.data.cmp.ccs != kCCStrategyUseOption) { 2256 viml_parser_highlight(pstate, cur_token.start, cur_token.len - 1, 2257 HL(Comparison)); 2258 viml_parser_highlight(pstate, shifted_pos(cur_token.start, cur_token.len - 1), 1, 2259 HL(ComparisonModifier)); 2260 } else { 2261 HL_CUR_TOKEN(Comparison); 2262 } 2263 want_node = kENodeValue; 2264 break; 2265 case kExprLexComma: 2266 assert(!(want_node == kENodeValue && cur_pt == kEPTLambdaArguments)); 2267 if (want_node == kENodeValue) { 2268 // Value level: comma appearing here is not valid. 2269 // Note: in Vim string(,x) will give E116, this is not the case here. 2270 ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Expected value, got comma: %.*s")); 2271 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeMissing); 2272 cur_node->len = 0; 2273 *top_node_p = cur_node; 2274 want_node = kENodeOperator; 2275 } 2276 if (cur_pt == kEPTLambdaArguments) { 2277 assert(lambda_node != NULL); 2278 assert(lambda_node->data.fig.type_guesses.allow_lambda); 2279 SELECT_FIGURE_BRACE_TYPE(lambda_node, Lambda, Lambda); 2280 } 2281 if (kv_size(ast_stack) < 2) { 2282 goto viml_pexpr_parse_invalid_comma; 2283 } 2284 for (size_t i = 1; i < kv_size(ast_stack); i++) { 2285 ExprASTNode *const *const eastnode_p = 2286 (ExprASTNode *const *)kv_Z(ast_stack, i); 2287 const ExprASTNodeType eastnode_type = (*eastnode_p)->type; 2288 const ExprOpLvl eastnode_lvl = node_lvl(**eastnode_p); 2289 if (eastnode_type == kExprNodeLambda) { 2290 assert(cur_pt == kEPTLambdaArguments 2291 && want_node == kENodeOperator); 2292 break; 2293 } else if (eastnode_type == kExprNodeDictLiteral 2294 || eastnode_type == kExprNodeListLiteral 2295 || eastnode_type == kExprNodeCall) { 2296 break; 2297 } else if (eastnode_type == kExprNodeComma 2298 || eastnode_type == kExprNodeColon 2299 || eastnode_lvl > kEOpLvlComma) { 2300 // Do nothing 2301 } else { 2302 viml_pexpr_parse_invalid_comma: 2303 ERROR_FROM_TOKEN_AND_MSG(cur_token, 2304 _("E15: Comma outside of call, lambda or literal: %.*s")); 2305 break; 2306 } 2307 if (i == kv_size(ast_stack) - 1) { 2308 goto viml_pexpr_parse_invalid_comma; 2309 } 2310 } 2311 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeComma); 2312 ADD_OP_NODE(cur_node); 2313 HL_CUR_TOKEN(Comma); 2314 break; 2315 #define EXP_VAL_COLON "E15: Expected value, got colon: %.*s" 2316 case kExprLexColon: { 2317 bool is_ternary = false; 2318 if (kv_size(ast_stack) < 2) { 2319 goto viml_pexpr_parse_invalid_colon; 2320 } 2321 bool can_be_ternary = true; 2322 bool is_subscript = false; 2323 for (size_t i = 1; i < kv_size(ast_stack); i++) { 2324 ExprASTNode *const *const eastnode_p = 2325 (ExprASTNode *const *)kv_Z(ast_stack, i); 2326 const ExprASTNodeType eastnode_type = (*eastnode_p)->type; 2327 const ExprOpLvl eastnode_lvl = node_lvl(**eastnode_p); 2328 STATIC_ASSERT(kEOpLvlTernary > kEOpLvlComma, 2329 "Unexpected operator priorities"); 2330 if (can_be_ternary && eastnode_type == kExprNodeTernaryValue 2331 && !(*eastnode_p)->data.ter.got_colon) { 2332 kv_drop(ast_stack, i); 2333 (*eastnode_p)->start = cur_token.start; 2334 (*eastnode_p)->len = cur_token.len; 2335 if (prev_token.type == kExprLexSpacing) { 2336 (*eastnode_p)->start = prev_token.start; 2337 (*eastnode_p)->len += prev_token.len; 2338 } 2339 is_ternary = true; 2340 (*eastnode_p)->data.ter.got_colon = true; 2341 ADD_VALUE_IF_MISSING(_(EXP_VAL_COLON)); 2342 assert((*eastnode_p)->children != NULL); 2343 assert((*eastnode_p)->children->next == NULL); 2344 kvi_push(ast_stack, &(*eastnode_p)->children->next); 2345 break; 2346 } else if (eastnode_type == kExprNodeUnknownFigure) { 2347 SELECT_FIGURE_BRACE_TYPE(*eastnode_p, DictLiteral, Dict); 2348 break; 2349 } else if (eastnode_type == kExprNodeDictLiteral) { 2350 break; 2351 } else if (eastnode_type == kExprNodeSubscript) { 2352 is_subscript = true; 2353 // can_be_ternary = false; 2354 assert(!is_ternary); 2355 break; 2356 } else if (eastnode_type == kExprNodeColon) { 2357 goto viml_pexpr_parse_invalid_colon; 2358 } else if (eastnode_lvl >= kEOpLvlTernaryValue) { 2359 // Do nothing 2360 } else if (eastnode_lvl >= kEOpLvlComma) { 2361 can_be_ternary = false; 2362 } else { 2363 goto viml_pexpr_parse_invalid_colon; 2364 } 2365 if (i == kv_size(ast_stack) - 1) { 2366 goto viml_pexpr_parse_invalid_colon; 2367 } 2368 } 2369 if (is_subscript) { 2370 assert(kv_size(ast_stack) > 1); 2371 // Colon immediately following subscript start: it is empty subscript 2372 // part like a[:2]. 2373 if (want_node == kENodeValue 2374 && (*kv_Z(ast_stack, 1))->type == kExprNodeSubscript) { 2375 NEW_NODE_WITH_CUR_POS(*top_node_p, kExprNodeMissing); 2376 (*top_node_p)->len = 0; 2377 want_node = kENodeOperator; 2378 } else { 2379 ADD_VALUE_IF_MISSING(_(EXP_VAL_COLON)); 2380 } 2381 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeColon); 2382 ADD_OP_NODE(cur_node); 2383 HL_CUR_TOKEN(SubscriptColon); 2384 } else { 2385 goto viml_pexpr_parse_valid_colon; 2386 viml_pexpr_parse_invalid_colon: 2387 ERROR_FROM_TOKEN_AND_MSG(cur_token, 2388 _("E15: Colon outside of dictionary or ternary operator: %.*s")); 2389 viml_pexpr_parse_valid_colon: 2390 ADD_VALUE_IF_MISSING(_(EXP_VAL_COLON)); 2391 if (is_ternary) { 2392 HL_CUR_TOKEN(TernaryColon); 2393 } else { 2394 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeColon); 2395 ADD_OP_NODE(cur_node); 2396 HL_CUR_TOKEN(Colon); 2397 } 2398 } 2399 want_node = kENodeValue; 2400 break; 2401 } 2402 #undef EXP_VAL_COLON 2403 case kExprLexBracket: 2404 if (cur_token.data.brc.closing) { 2405 ExprASTNode **new_top_node_p = NULL; 2406 // Always drop the topmost value: 2407 // 2408 // 1. When want_node != kENodeValue topmost item on stack is 2409 // a *finished* left operand, which may as well be "[@a]" which 2410 // needs not be finished again. 2411 // 2. Otherwise it is pointing to NULL what nobody wants. 2412 kv_drop(ast_stack, 1); 2413 if (!kv_size(ast_stack)) { 2414 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeListLiteral); 2415 cur_node->len = 0; 2416 if (want_node != kENodeValue) { 2417 cur_node->children = *top_node_p; 2418 } 2419 *top_node_p = cur_node; 2420 new_top_node_p = top_node_p; 2421 goto viml_pexpr_parse_bracket_closing_error; 2422 } 2423 if (want_node == kENodeValue) { 2424 // It is OK to want value if 2425 // 2426 // 1. It is empty list literal, in which case top node will be 2427 // ListLiteral. 2428 // 2. It is list literal with trailing comma, in which case top node 2429 // will be that comma. 2430 // 3. It is subscript with colon, but without one of the values: 2431 // e.g. "a[:]", "a[1:]", top node will be colon in this case. 2432 if ((*kv_last(ast_stack))->type != kExprNodeListLiteral 2433 && (*kv_last(ast_stack))->type != kExprNodeComma 2434 && (*kv_last(ast_stack))->type != kExprNodeColon) { 2435 ERROR_FROM_TOKEN_AND_MSG(cur_token, 2436 _("E15: Expected value, got closing bracket: %.*s")); 2437 } 2438 } 2439 do { 2440 new_top_node_p = kv_pop(ast_stack); 2441 } while (kv_size(ast_stack) 2442 && (new_top_node_p == NULL 2443 || ((*new_top_node_p)->type != kExprNodeListLiteral 2444 && (*new_top_node_p)->type != kExprNodeSubscript))); 2445 ExprASTNode *new_top_node = *new_top_node_p; 2446 switch (new_top_node->type) { 2447 case kExprNodeListLiteral: 2448 if (pt_is_assignment(cur_pt) && new_top_node->children == NULL) { 2449 ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E475: Unable to assign to empty list: %.*s")); 2450 } 2451 HL_CUR_TOKEN(List); 2452 break; 2453 case kExprNodeSubscript: 2454 HL_CUR_TOKEN(SubscriptBracket); 2455 break; 2456 default: 2457 viml_pexpr_parse_bracket_closing_error: 2458 assert(!kv_size(ast_stack)); 2459 ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Unexpected closing figure brace: %.*s")); 2460 HL_CUR_TOKEN(List); 2461 break; 2462 } 2463 kvi_push(ast_stack, new_top_node_p); 2464 want_node = kENodeOperator; 2465 if (kv_size(ast_stack) <= asgn_level) { 2466 assert(kv_size(ast_stack) == asgn_level); 2467 asgn_level = 0; 2468 if (cur_pt == kEPTAssignment) { 2469 assert(ast.err.msg); 2470 } else if (cur_pt == kEPTExpr 2471 && kv_size(pt_stack) > 1 2472 && pt_is_assignment(kv_Z(pt_stack, 1))) { 2473 kv_drop(pt_stack, 1); 2474 } 2475 } 2476 if (cur_pt == kEPTSingleAssignment && kv_size(ast_stack) == 1) { 2477 kv_drop(pt_stack, 1); 2478 } 2479 } else { 2480 if (want_node == kENodeValue) { 2481 // Value means list literal or list assignment. 2482 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeListLiteral); 2483 *top_node_p = cur_node; 2484 kvi_push(ast_stack, &cur_node->children); 2485 if (cur_pt == kEPTAssignment) { 2486 // Additional assignment parse type allows to easily forbid nested 2487 // lists. 2488 kvi_push(pt_stack, kEPTSingleAssignment); 2489 } else if (cur_pt == kEPTSingleAssignment) { 2490 ERROR_FROM_TOKEN_AND_MSG(cur_token, 2491 _("E475: Nested lists not allowed when assigning: %.*s")); 2492 } 2493 HL_CUR_TOKEN(List); 2494 } else { 2495 // Operator means subscript, also in assignment. But in assignment 2496 // subscript may be pretty much any expression, so need to push 2497 // kEPTExpr. 2498 if (prev_token.type == kExprLexSpacing) { 2499 OP_MISSING; 2500 } 2501 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeSubscript); 2502 ADD_OP_NODE(cur_node); 2503 HL_CUR_TOKEN(SubscriptBracket); 2504 if (pt_is_assignment(cur_pt)) { 2505 assert(want_node == kENodeValue); // Subtract 1 for NULL at top. 2506 asgn_level = kv_size(ast_stack) - 1; 2507 kvi_push(pt_stack, kEPTExpr); 2508 } 2509 } 2510 } 2511 break; 2512 case kExprLexFigureBrace: 2513 if (cur_token.data.brc.closing) { 2514 ExprASTNode **new_top_node_p = NULL; 2515 // Always drop the topmost value: 2516 // 2517 // 1. When want_node != kENodeValue topmost item on stack is 2518 // a *finished* left operand, which may as well be "{@a}" which 2519 // needs not be finished again. 2520 // 2. Otherwise it is pointing to NULL what nobody wants. 2521 kv_drop(ast_stack, 1); 2522 if (!kv_size(ast_stack)) { 2523 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeUnknownFigure); 2524 cur_node->data.fig.type_guesses.allow_lambda = false; 2525 cur_node->data.fig.type_guesses.allow_dict = false; 2526 cur_node->data.fig.type_guesses.allow_ident = false; 2527 cur_node->len = 0; 2528 if (want_node != kENodeValue) { 2529 cur_node->children = *top_node_p; 2530 } 2531 *top_node_p = cur_node; 2532 new_top_node_p = top_node_p; 2533 goto viml_pexpr_parse_figure_brace_closing_error; 2534 } 2535 if (want_node == kENodeValue) { 2536 if ((*kv_last(ast_stack))->type != kExprNodeUnknownFigure 2537 && (*kv_last(ast_stack))->type != kExprNodeComma) { 2538 // kv_last being UnknownFigure may occur for empty dictionary 2539 // literal, while Comma is expected in case of non-empty one. 2540 ERROR_FROM_TOKEN_AND_MSG(cur_token, 2541 _("E15: Expected value, got closing figure brace: %.*s")); 2542 } 2543 } 2544 do { 2545 new_top_node_p = kv_pop(ast_stack); 2546 } while (kv_size(ast_stack) 2547 && (new_top_node_p == NULL 2548 || ((*new_top_node_p)->type != kExprNodeUnknownFigure 2549 && (*new_top_node_p)->type != kExprNodeDictLiteral 2550 && ((*new_top_node_p)->type 2551 != kExprNodeCurlyBracesIdentifier) 2552 && (*new_top_node_p)->type != kExprNodeLambda))); 2553 ExprASTNode *new_top_node = *new_top_node_p; 2554 switch (new_top_node->type) { 2555 case kExprNodeUnknownFigure: 2556 if (new_top_node->children == NULL) { 2557 // No children of curly braces node indicates empty dictionary. 2558 assert(want_node == kENodeValue); 2559 assert(new_top_node->data.fig.type_guesses.allow_dict); 2560 SELECT_FIGURE_BRACE_TYPE(new_top_node, DictLiteral, Dict); 2561 HL_CUR_TOKEN(Dict); 2562 } else if (new_top_node->data.fig.type_guesses.allow_ident) { 2563 SELECT_FIGURE_BRACE_TYPE(new_top_node, CurlyBracesIdentifier, 2564 Curly); 2565 HL_CUR_TOKEN(Curly); 2566 } else { 2567 // If by this time type of the node has not already been 2568 // guessed, but it definitely is not a curly braces name then 2569 // it is invalid for sure. 2570 ERROR_FROM_NODE_AND_MSG(new_top_node, 2571 _("E15: Don't know what figure brace means: %.*s")); 2572 if (pstate->colors) { 2573 // Will reset to NvimInvalidFigureBrace. 2574 kv_A(*pstate->colors, 2575 new_top_node->data.fig.opening_hl_idx).group = ( 2576 HL(FigureBrace)); 2577 } 2578 HL_CUR_TOKEN(FigureBrace); 2579 } 2580 break; 2581 case kExprNodeDictLiteral: 2582 HL_CUR_TOKEN(Dict); 2583 break; 2584 case kExprNodeCurlyBracesIdentifier: 2585 HL_CUR_TOKEN(Curly); 2586 break; 2587 case kExprNodeLambda: 2588 HL_CUR_TOKEN(Lambda); 2589 break; 2590 default: 2591 viml_pexpr_parse_figure_brace_closing_error: 2592 assert(!kv_size(ast_stack)); 2593 ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Unexpected closing figure brace: %.*s")); 2594 HL_CUR_TOKEN(FigureBrace); 2595 break; 2596 } 2597 kvi_push(ast_stack, new_top_node_p); 2598 want_node = kENodeOperator; 2599 if (kv_size(ast_stack) <= asgn_level) { 2600 assert(kv_size(ast_stack) == asgn_level); 2601 if (cur_pt == kEPTExpr 2602 && kv_size(pt_stack) > 1 2603 && pt_is_assignment(kv_Z(pt_stack, 1))) { 2604 kv_drop(pt_stack, 1); 2605 asgn_level = 0; 2606 } 2607 } 2608 } else { 2609 if (want_node == kENodeValue) { 2610 HL_CUR_TOKEN(FigureBrace); 2611 // Value: may be any of lambda, dictionary literal and curly braces 2612 // name. 2613 2614 // Though if we are in an assignment this may only be a curly braces 2615 // name. 2616 if (pt_is_assignment(cur_pt)) { 2617 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeCurlyBracesIdentifier); 2618 cur_node->data.fig.type_guesses.allow_lambda = false; 2619 cur_node->data.fig.type_guesses.allow_dict = false; 2620 cur_node->data.fig.type_guesses.allow_ident = true; 2621 kvi_push(pt_stack, kEPTExpr); 2622 } else { 2623 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeUnknownFigure); 2624 cur_node->data.fig.type_guesses.allow_lambda = true; 2625 cur_node->data.fig.type_guesses.allow_dict = true; 2626 cur_node->data.fig.type_guesses.allow_ident = true; 2627 } 2628 if (pstate->colors) { 2629 cur_node->data.fig.opening_hl_idx = kv_size(*pstate->colors) - 1; 2630 } 2631 *top_node_p = cur_node; 2632 kvi_push(ast_stack, &cur_node->children); 2633 kvi_push(pt_stack, kEPTLambdaArguments); 2634 lambda_node = cur_node; 2635 } else { 2636 // uncrustify:off 2637 ADD_IDENT(do { 2638 NEW_NODE_WITH_CUR_POS(cur_node, 2639 kExprNodeCurlyBracesIdentifier); 2640 if (pstate->colors) { 2641 cur_node->data.fig.opening_hl_idx = kv_size(*pstate->colors); 2642 } 2643 cur_node->data.fig.type_guesses.allow_lambda = false; 2644 cur_node->data.fig.type_guesses.allow_dict = false; 2645 cur_node->data.fig.type_guesses.allow_ident = true; 2646 kvi_push(ast_stack, &cur_node->children); 2647 if (pt_is_assignment(cur_pt)) { 2648 kvi_push(pt_stack, kEPTExpr); 2649 } 2650 want_node = kENodeValue; 2651 } while (0), 2652 Curly); 2653 // uncrustify:on 2654 } 2655 if (pt_is_assignment(cur_pt) 2656 && !pt_is_assignment(kv_last(pt_stack))) { 2657 assert(want_node == kENodeValue); // Subtract 1 for NULL at top. 2658 asgn_level = kv_size(ast_stack) - 1; 2659 } 2660 } 2661 break; 2662 case kExprLexArrow: 2663 if (cur_pt == kEPTLambdaArguments) { 2664 kv_drop(pt_stack, 1); 2665 assert(kv_size(pt_stack)); 2666 if (want_node == kENodeValue) { 2667 // Wanting value means trailing comma and NULL at the top of the 2668 // stack. 2669 kv_drop(ast_stack, 1); 2670 } 2671 assert(kv_size(ast_stack) >= 1); 2672 while ((*kv_last(ast_stack))->type != kExprNodeLambda 2673 && (*kv_last(ast_stack))->type != kExprNodeUnknownFigure) { 2674 kv_drop(ast_stack, 1); 2675 } 2676 assert((*kv_last(ast_stack)) == lambda_node); 2677 SELECT_FIGURE_BRACE_TYPE(lambda_node, Lambda, Lambda); 2678 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeArrow); 2679 if (lambda_node->children == NULL) { 2680 assert(want_node == kENodeValue); 2681 lambda_node->children = cur_node; 2682 kvi_push(ast_stack, &lambda_node->children); 2683 } else { 2684 assert(lambda_node->children->next == NULL); 2685 lambda_node->children->next = cur_node; 2686 kvi_push(ast_stack, &lambda_node->children->next); 2687 } 2688 kvi_push(ast_stack, &cur_node->children); 2689 lambda_node = NULL; 2690 } else { 2691 // Only first branch is valid. 2692 ADD_VALUE_IF_MISSING(_("E15: Unexpected arrow: %.*s")); 2693 ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Arrow outside of lambda: %.*s")); 2694 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeArrow); 2695 ADD_OP_NODE(cur_node); 2696 } 2697 want_node = kENodeValue; 2698 HL_CUR_TOKEN(Arrow); 2699 break; 2700 case kExprLexPlainIdentifier: { 2701 const ExprVarScope scope = (cur_token.type == kExprLexInvalid 2702 ? kExprVarScopeMissing 2703 : cur_token.data.var.scope); 2704 if (want_node == kENodeValue) { 2705 want_node = kENodeOperator; 2706 NEW_NODE_WITH_CUR_POS(cur_node, 2707 (node_is_key 2708 ? kExprNodePlainKey 2709 : kExprNodePlainIdentifier)); 2710 cur_node->data.var.scope = scope; 2711 const size_t scope_shift = (scope == kExprVarScopeMissing ? 0 : 2); 2712 cur_node->data.var.ident = (pline.data + cur_token.start.col 2713 + scope_shift); 2714 cur_node->data.var.ident_len = cur_token.len - scope_shift; 2715 *top_node_p = cur_node; 2716 if (scope_shift) { 2717 assert(!node_is_key); 2718 viml_parser_highlight(pstate, cur_token.start, 1, 2719 HL(IdentifierScope)); 2720 viml_parser_highlight(pstate, shifted_pos(cur_token.start, 1), 1, 2721 HL(IdentifierScopeDelimiter)); 2722 } 2723 viml_parser_highlight(pstate, shifted_pos(cur_token.start, 2724 scope_shift), 2725 cur_token.len - scope_shift, 2726 (node_is_key 2727 ? HL(IdentifierKey) 2728 : HL(IdentifierName))); 2729 } else { 2730 if (scope == kExprVarScopeMissing) { 2731 // uncrustify:off 2732 ADD_IDENT(do { 2733 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodePlainIdentifier); 2734 cur_node->data.var.scope = scope; 2735 cur_node->data.var.ident = pline.data + cur_token.start.col; 2736 cur_node->data.var.ident_len = cur_token.len; 2737 want_node = kENodeOperator; 2738 } while (0), 2739 IdentifierName); 2740 // uncrustify:on 2741 } else { 2742 OP_MISSING; 2743 } 2744 } 2745 break; 2746 } 2747 case kExprLexNumber: 2748 if (want_node != kENodeValue) { 2749 OP_MISSING; 2750 } 2751 if (node_is_key) { 2752 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodePlainKey); 2753 cur_node->data.var.ident = pline.data + cur_token.start.col; 2754 cur_node->data.var.ident_len = cur_token.len; 2755 HL_CUR_TOKEN(IdentifierKey); 2756 } else if (cur_token.data.num.is_float) { 2757 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeFloat); 2758 cur_node->data.flt.value = cur_token.data.num.val.floating; 2759 HL_CUR_TOKEN(Float); 2760 } else { 2761 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeInteger); 2762 cur_node->data.num.value = cur_token.data.num.val.integer; 2763 const uint8_t prefix_length = base_to_prefix_length[ 2764 cur_token.data.num.base]; 2765 viml_parser_highlight(pstate, cur_token.start, prefix_length, 2766 HL(NumberPrefix)); 2767 viml_parser_highlight(pstate, shifted_pos(cur_token.start, prefix_length), 2768 cur_token.len - prefix_length, HL(Number)); 2769 } 2770 want_node = kENodeOperator; 2771 *top_node_p = cur_node; 2772 break; 2773 case kExprLexDot: 2774 ADD_VALUE_IF_MISSING(_("E15: Unexpected dot: %.*s")); 2775 if (prev_token.type == kExprLexSpacing) { 2776 if (cur_pt == kEPTAssignment) { 2777 ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Cannot concatenate in assignments: %.*s")); 2778 } 2779 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeConcat); 2780 HL_CUR_TOKEN(Concat); 2781 } else { 2782 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeConcatOrSubscript); 2783 HL_CUR_TOKEN(ConcatOrSubscript); 2784 } 2785 ADD_OP_NODE(cur_node); 2786 break; 2787 case kExprLexParenthesis: 2788 if (cur_token.data.brc.closing) { 2789 if (want_node == kENodeValue) { 2790 if (kv_size(ast_stack) > 1) { 2791 const ExprASTNode *const prev_top_node = *kv_Z(ast_stack, 1); 2792 if (prev_top_node->type == kExprNodeCall) { 2793 // Function call without arguments, this is not an error. 2794 // But further code does not expect NULL nodes. 2795 kv_drop(ast_stack, 1); 2796 goto viml_pexpr_parse_no_paren_closing_error; 2797 } 2798 } 2799 ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Expected value, got parenthesis: %.*s")); 2800 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeMissing); 2801 cur_node->len = 0; 2802 *top_node_p = cur_node; 2803 } else { 2804 // Always drop the topmost value: when want_node != kENodeValue 2805 // topmost item on stack is a *finished* left operand, which may as 2806 // well be "(@a)" which needs not be finished again. 2807 kv_drop(ast_stack, 1); 2808 } 2809 viml_pexpr_parse_no_paren_closing_error: {} 2810 ExprASTNode **new_top_node_p = NULL; 2811 while (kv_size(ast_stack) 2812 && (new_top_node_p == NULL 2813 || ((*new_top_node_p)->type != kExprNodeNested 2814 && (*new_top_node_p)->type != kExprNodeCall))) { 2815 new_top_node_p = kv_pop(ast_stack); 2816 } 2817 if (new_top_node_p != NULL 2818 && ((*new_top_node_p)->type == kExprNodeNested 2819 || (*new_top_node_p)->type == kExprNodeCall)) { 2820 if ((*new_top_node_p)->type == kExprNodeNested) { 2821 HL_CUR_TOKEN(NestingParenthesis); 2822 } else { 2823 HL_CUR_TOKEN(CallingParenthesis); 2824 } 2825 } else { 2826 // “Always drop the topmost value” branch has got rid of the single 2827 // value stack had, so there is nothing known to enclose. Correct 2828 // this. 2829 if (new_top_node_p == NULL) { 2830 new_top_node_p = top_node_p; 2831 } 2832 ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Unexpected closing parenthesis: %.*s")); 2833 HL_CUR_TOKEN(NestingParenthesis); 2834 cur_node = NEW_NODE(kExprNodeNested); 2835 cur_node->start = cur_token.start; 2836 cur_node->len = 0; 2837 // Unexpected closing parenthesis, assume that it was wanted to 2838 // enclose everything in (). 2839 cur_node->children = *new_top_node_p; 2840 *new_top_node_p = cur_node; 2841 assert(cur_node->next == NULL); 2842 } 2843 kvi_push(ast_stack, new_top_node_p); 2844 want_node = kENodeOperator; 2845 } else { 2846 switch (want_node) { 2847 case kENodeValue: 2848 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeNested); 2849 *top_node_p = cur_node; 2850 kvi_push(ast_stack, &cur_node->children); 2851 HL_CUR_TOKEN(NestingParenthesis); 2852 break; 2853 case kENodeOperator: 2854 if (prev_token.type == kExprLexSpacing) { 2855 // For some reason "function (args)" is a function call, but 2856 // "(funcref) (args)" is not. As far as I remember this somehow involves 2857 // compatibility and Bram was commenting that this is 2858 // intentionally inconsistent and he is not very happy with the 2859 // situation himself. 2860 if ((*top_node_p)->type != kExprNodePlainIdentifier 2861 && (*top_node_p)->type != kExprNodeComplexIdentifier 2862 && (*top_node_p)->type != kExprNodeCurlyBracesIdentifier) { 2863 OP_MISSING; 2864 } 2865 } 2866 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeCall); 2867 ADD_OP_NODE(cur_node); 2868 HL_CUR_TOKEN(CallingParenthesis); 2869 break; 2870 } 2871 want_node = kENodeValue; 2872 } 2873 break; 2874 case kExprLexQuestion: { 2875 ADD_VALUE_IF_MISSING(_("E15: Expected value, got question mark: %.*s")); 2876 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeTernary); 2877 ADD_OP_NODE(cur_node); 2878 HL_CUR_TOKEN(Ternary); 2879 ExprASTNode *ter_val_node; 2880 NEW_NODE_WITH_CUR_POS(ter_val_node, kExprNodeTernaryValue); 2881 ter_val_node->data.ter.got_colon = false; 2882 assert(cur_node->children != NULL); 2883 assert(cur_node->children->next == NULL); 2884 assert(kv_last(ast_stack) == &cur_node->children->next); 2885 *kv_last(ast_stack) = ter_val_node; 2886 kvi_push(ast_stack, &ter_val_node->children); 2887 break; 2888 } 2889 case kExprLexDoubleQuotedString: 2890 case kExprLexSingleQuotedString: { 2891 const bool is_double = (tok_type == kExprLexDoubleQuotedString); 2892 if (!cur_token.data.str.closed) { 2893 // It is weird, but Vim has two identical errors messages with 2894 // different error numbers: "E114: Missing quote" and 2895 // "E115: Missing quote". 2896 ERROR_FROM_TOKEN_AND_MSG(cur_token, (is_double 2897 ? _("E114: Missing double quote: %.*s") 2898 : _("E115: Missing single quote: %.*s"))); 2899 } 2900 if (want_node == kENodeOperator) { 2901 OP_MISSING; 2902 } 2903 NEW_NODE_WITH_CUR_POS(cur_node, (is_double 2904 ? kExprNodeDoubleQuotedString 2905 : kExprNodeSingleQuotedString)); 2906 *top_node_p = cur_node; 2907 parse_quoted_string(pstate, cur_node, cur_token, &ast_stack, is_invalid); 2908 want_node = kENodeOperator; 2909 break; 2910 } 2911 case kExprLexAssignment: 2912 if (cur_pt == kEPTAssignment) { 2913 kv_drop(pt_stack, 1); 2914 } else if (cur_pt == kEPTSingleAssignment) { 2915 kv_drop(pt_stack, 2); 2916 ERROR_FROM_TOKEN_AND_MSG(cur_token, 2917 _("E475: Expected closing bracket to end list assignment " 2918 "lvalue: %.*s")); 2919 } else { 2920 ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Misplaced assignment: %.*s")); 2921 } 2922 assert(kv_size(pt_stack)); 2923 assert(kv_last(pt_stack) == kEPTExpr); 2924 ADD_VALUE_IF_MISSING(_("E15: Unexpected assignment: %.*s")); 2925 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeAssignment); 2926 cur_node->data.ass.type = cur_token.data.ass.type; 2927 switch (cur_token.data.ass.type) { 2928 #define HL_ASGN(asgn, hl) \ 2929 case kExprAsgn##asgn: { HL_CUR_TOKEN(hl); break; } 2930 HL_ASGN(Plain, PlainAssignment) 2931 HL_ASGN(Add, AssignmentWithAddition) 2932 HL_ASGN(Subtract, AssignmentWithSubtraction) 2933 HL_ASGN(Concat, AssignmentWithConcatenation) 2934 #undef HL_ASGN 2935 } 2936 ADD_OP_NODE(cur_node); 2937 break; 2938 } 2939 viml_pexpr_parse_cycle_end: 2940 prev_token = cur_token; 2941 highlighted_prev_spacing = false; 2942 viml_parser_advance(pstate, cur_token.len); 2943 } while (true); 2944 viml_pexpr_parse_end: 2945 assert(kv_size(pt_stack)); 2946 assert(kv_size(ast_stack)); 2947 if (want_node == kENodeValue 2948 // Blacklist some parse type entries as their presence means better error 2949 // message in the other branch. 2950 && kv_last(pt_stack) != kEPTLambdaArguments) { 2951 east_set_error(pstate, &ast.err, _("E15: Expected value, got EOC: %.*s"), 2952 pstate->pos); 2953 } else if (kv_size(ast_stack) != 1) { 2954 // Something may be wrong, check whether it really is. 2955 2956 // Pointer to ast.root must never be dropped, so “!= 1” is expected to be 2957 // the same as “> 1”. 2958 assert(kv_size(ast_stack)); 2959 // Topmost stack item must be a *finished* value, so it must not be 2960 // analyzed. E.g. it may contain an already finished nested expression. 2961 kv_drop(ast_stack, 1); 2962 while (ast.err.msg == NULL && kv_size(ast_stack)) { 2963 const ExprASTNode *const cur_node = (*kv_pop(ast_stack)); 2964 // This should only happen when want_node == kENodeValue. 2965 assert(cur_node != NULL); 2966 // TODO(ZyX-I): Rehighlight as invalid? 2967 switch (cur_node->type) { 2968 case kExprNodeOpMissing: 2969 case kExprNodeMissing: 2970 // Error should’ve been already reported. 2971 break; 2972 case kExprNodeCall: 2973 east_set_error(pstate, &ast.err, 2974 _("E116: Missing closing parenthesis for function call: %.*s"), 2975 cur_node->start); 2976 break; 2977 case kExprNodeNested: 2978 east_set_error(pstate, &ast.err, 2979 _("E110: Missing closing parenthesis for nested expression" 2980 ": %.*s"), 2981 cur_node->start); 2982 break; 2983 case kExprNodeListLiteral: 2984 // For whatever reason "[1" yields "E696: Missing comma in list" error 2985 // in Vim while "[1," yields E697. 2986 east_set_error(pstate, &ast.err, 2987 _("E697: Missing end of List ']': %.*s"), 2988 cur_node->start); 2989 break; 2990 case kExprNodeDictLiteral: 2991 // Same problem like with list literal with E722 (missing comma) vs 2992 // E723, but additionally just "{" yields only E15. 2993 east_set_error(pstate, &ast.err, 2994 _("E723: Missing end of Dictionary '}': %.*s"), 2995 cur_node->start); 2996 break; 2997 case kExprNodeUnknownFigure: 2998 east_set_error(pstate, &ast.err, 2999 _("E15: Missing closing figure brace: %.*s"), 3000 cur_node->start); 3001 break; 3002 case kExprNodeLambda: 3003 east_set_error(pstate, &ast.err, 3004 _("E15: Missing closing figure brace for lambda: %.*s"), 3005 cur_node->start); 3006 break; 3007 case kExprNodeCurlyBracesIdentifier: 3008 // Until trailing "}" it is impossible to distinguish curly braces 3009 // identifier and Dict, so it must not appear in the stack like this. 3010 abort(); 3011 case kExprNodeInteger: 3012 case kExprNodeFloat: 3013 case kExprNodeSingleQuotedString: 3014 case kExprNodeDoubleQuotedString: 3015 case kExprNodeOption: 3016 case kExprNodeEnvironment: 3017 case kExprNodeRegister: 3018 case kExprNodePlainIdentifier: 3019 case kExprNodePlainKey: 3020 // These are plain values and not containers, for them it should only 3021 // be possible to show up in the topmost stack element, but it was 3022 // unconditionally popped at the start. 3023 abort(); 3024 case kExprNodeComma: 3025 case kExprNodeColon: 3026 case kExprNodeArrow: 3027 // It is actually only valid inside something else, but everything 3028 // where one of the above is valid requires to be closed and thus is 3029 // to be caught later. 3030 break; 3031 case kExprNodeSubscript: 3032 case kExprNodeConcatOrSubscript: 3033 case kExprNodeComplexIdentifier: 3034 case kExprNodeAssignment: 3035 case kExprNodeMod: 3036 case kExprNodeDivision: 3037 case kExprNodeMultiplication: 3038 case kExprNodeNot: 3039 case kExprNodeAnd: 3040 case kExprNodeOr: 3041 case kExprNodeConcat: 3042 case kExprNodeComparison: 3043 case kExprNodeUnaryMinus: 3044 case kExprNodeUnaryPlus: 3045 case kExprNodeBinaryMinus: 3046 case kExprNodeTernary: 3047 case kExprNodeBinaryPlus: 3048 // It is OK to see these in the stack. 3049 break; 3050 case kExprNodeTernaryValue: 3051 if (!cur_node->data.ter.got_colon) { 3052 // Actually Vim throws E109 in more cases. 3053 east_set_error(pstate, &ast.err, _("E109: Missing ':' after '?': %.*s"), 3054 cur_node->start); 3055 } 3056 break; 3057 } 3058 } 3059 } 3060 kvi_destroy(ast_stack); 3061 return ast; 3062 } 3063 3064 #undef NEW_NODE 3065 #undef HL