decode.c (38328B)
1 #include <assert.h> 2 #include <stdbool.h> 3 #include <stddef.h> 4 #include <stdint.h> 5 #include <stdlib.h> 6 #include <string.h> 7 8 #include "klib/kvec.h" 9 #include "mpack/conv.h" 10 #include "mpack/mpack_core.h" 11 #include "mpack/object.h" 12 #include "nvim/ascii_defs.h" 13 #include "nvim/charset.h" 14 #include "nvim/eval.h" 15 #include "nvim/eval/decode.h" 16 #include "nvim/eval/encode.h" 17 #include "nvim/eval/typval.h" 18 #include "nvim/eval/typval_defs.h" 19 #include "nvim/eval/vars.h" 20 #include "nvim/eval_defs.h" 21 #include "nvim/garray.h" 22 #include "nvim/gettext_defs.h" 23 #include "nvim/macros_defs.h" 24 #include "nvim/mbyte.h" 25 #include "nvim/memory.h" 26 #include "nvim/message.h" 27 #include "nvim/vim_defs.h" 28 29 /// Helper structure for container_struct 30 typedef struct { 31 size_t stack_index; ///< Index of current container in stack. 32 list_T *special_val; ///< _VAL key contents for special maps. 33 ///< When container is not a special dictionary it is 34 ///< NULL. 35 const char *s; ///< Location where container starts. 36 typval_T container; ///< Container. Either VAR_LIST, VAR_DICT or VAR_LIST 37 ///< which is _VAL from special dictionary. 38 } ContainerStackItem; 39 40 /// Helper structure for values struct 41 typedef struct { 42 bool is_special_string; ///< Indicates that current value is a special 43 ///< dictionary with string. 44 bool didcomma; ///< True if previous token was comma. 45 bool didcolon; ///< True if previous token was colon. 46 typval_T val; ///< Actual value. 47 } ValuesStackItem; 48 49 /// Vector containing values not yet saved in any container 50 typedef kvec_t(ValuesStackItem) ValuesStack; 51 52 /// Vector containing containers, each next container is located inside previous 53 typedef kvec_t(ContainerStackItem) ContainerStack; 54 55 #include "eval/decode.c.generated.h" 56 57 /// Create special dictionary 58 /// 59 /// @param[out] rettv Location where created dictionary will be saved. 60 /// @param[in] type Type of the dictionary. 61 /// @param[in] val Value associated with the _VAL key. 62 static inline void create_special_dict(typval_T *const rettv, const MessagePackType type, 63 typval_T val) 64 FUNC_ATTR_NONNULL_ALL 65 { 66 dict_T *const dict = tv_dict_alloc(); 67 dictitem_T *const type_di = tv_dict_item_alloc_len(S_LEN("_TYPE")); 68 type_di->di_tv.v_type = VAR_LIST; 69 type_di->di_tv.v_lock = VAR_UNLOCKED; 70 type_di->di_tv.vval.v_list = (list_T *)eval_msgpack_type_lists[type]; 71 tv_list_ref(type_di->di_tv.vval.v_list); 72 tv_dict_add(dict, type_di); 73 dictitem_T *const val_di = tv_dict_item_alloc_len(S_LEN("_VAL")); 74 val_di->di_tv = val; 75 tv_dict_add(dict, val_di); 76 dict->dv_refcount++; 77 *rettv = (typval_T) { 78 .v_type = VAR_DICT, 79 .v_lock = VAR_UNLOCKED, 80 .vval = { .v_dict = dict }, 81 }; 82 } 83 84 #define DICT_LEN(dict) (dict)->dv_hashtab.ht_used 85 86 /// Helper function used for working with stack vectors used by JSON decoder 87 /// 88 /// @param[in,out] obj New object. Will either be put into the stack (and, 89 /// probably, also inside container) or freed. 90 /// @param[out] stack Object stack. 91 /// @param[out] container_stack Container objects stack. 92 /// @param[in,out] pp Position in string which is currently being parsed. Used 93 /// for error reporting and is also set when decoding is 94 /// restarted due to the necessity of converting regular 95 /// dictionary to a special map. 96 /// @param[out] next_map_special Is set to true when dictionary needs to be 97 /// converted to a special map, otherwise not 98 /// touched. Indicates that decoding has been 99 /// restarted. 100 /// @param[out] didcomma True if previous token was comma. Is set to recorded 101 /// value when decoder is restarted, otherwise unused. 102 /// @param[out] didcolon True if previous token was colon. Is set to recorded 103 /// value when decoder is restarted, otherwise unused. 104 /// 105 /// @return OK in case of success, FAIL in case of error. 106 static inline int json_decoder_pop(ValuesStackItem obj, ValuesStack *const stack, 107 ContainerStack *const container_stack, const char **const pp, 108 bool *const next_map_special, bool *const didcomma, 109 bool *const didcolon) 110 FUNC_ATTR_NONNULL_ALL 111 { 112 if (kv_size(*container_stack) == 0) { 113 kv_push(*stack, obj); 114 return OK; 115 } 116 ContainerStackItem last_container = kv_last(*container_stack); 117 const char *val_location = *pp; 118 if (obj.val.v_type == last_container.container.v_type 119 // vval.v_list and vval.v_dict should have the same size and offset 120 && ((void *)obj.val.vval.v_list 121 == (void *)last_container.container.vval.v_list)) { 122 (void)kv_pop(*container_stack); 123 val_location = last_container.s; 124 last_container = kv_last(*container_stack); 125 } 126 if (last_container.container.v_type == VAR_LIST) { 127 if (tv_list_len(last_container.container.vval.v_list) != 0 128 && !obj.didcomma) { 129 semsg(_("E474: Expected comma before list item: %s"), val_location); 130 tv_clear(&obj.val); 131 return FAIL; 132 } 133 assert(last_container.special_val == NULL); 134 tv_list_append_owned_tv(last_container.container.vval.v_list, obj.val); 135 } else if (last_container.stack_index == kv_size(*stack) - 2) { 136 if (!obj.didcolon) { 137 semsg(_("E474: Expected colon before dictionary value: %s"), 138 val_location); 139 tv_clear(&obj.val); 140 return FAIL; 141 } 142 ValuesStackItem key = kv_pop(*stack); 143 if (last_container.special_val == NULL) { 144 // These cases should have already been handled. 145 assert(!(key.is_special_string || key.val.vval.v_string == NULL)); 146 dictitem_T *const obj_di = tv_dict_item_alloc(key.val.vval.v_string); 147 tv_clear(&key.val); 148 if (tv_dict_add(last_container.container.vval.v_dict, obj_di) 149 == FAIL) { 150 abort(); 151 } 152 obj_di->di_tv = obj.val; 153 } else { 154 list_T *const kv_pair = tv_list_alloc(2); 155 tv_list_append_list(last_container.special_val, kv_pair); 156 tv_list_append_owned_tv(kv_pair, key.val); 157 tv_list_append_owned_tv(kv_pair, obj.val); 158 } 159 } else { 160 // Object with key only 161 if (!obj.is_special_string && obj.val.v_type != VAR_STRING) { 162 semsg(_("E474: Expected string key: %s"), *pp); 163 tv_clear(&obj.val); 164 return FAIL; 165 } else if (!obj.didcomma 166 && (last_container.special_val == NULL 167 && (DICT_LEN(last_container.container.vval.v_dict) != 0))) { 168 semsg(_("E474: Expected comma before dictionary key: %s"), val_location); 169 tv_clear(&obj.val); 170 return FAIL; 171 } 172 // Handle special dictionaries 173 if (last_container.special_val == NULL 174 && (obj.is_special_string 175 || obj.val.vval.v_string == NULL 176 || tv_dict_find(last_container.container.vval.v_dict, obj.val.vval.v_string, -1))) { 177 tv_clear(&obj.val); 178 179 // Restart 180 (void)kv_pop(*container_stack); 181 ValuesStackItem last_container_val = 182 kv_A(*stack, last_container.stack_index); 183 while (kv_size(*stack) > last_container.stack_index) { 184 tv_clear(&(kv_pop(*stack).val)); 185 } 186 *pp = last_container.s; 187 *didcomma = last_container_val.didcomma; 188 *didcolon = last_container_val.didcolon; 189 *next_map_special = true; 190 return OK; 191 } 192 kv_push(*stack, obj); 193 } 194 return OK; 195 } 196 197 #define LENP(p, e) \ 198 ((int)((e) - (p))), (p) 199 #define OBJ(obj_tv, is_sp_string, didcomma_, didcolon_) \ 200 ((ValuesStackItem) { \ 201 .is_special_string = (is_sp_string), \ 202 .val = (obj_tv), \ 203 .didcomma = (didcomma_), \ 204 .didcolon = (didcolon_), \ 205 }) 206 207 #define POP(obj_tv, is_sp_string) \ 208 do { \ 209 if (json_decoder_pop(OBJ(obj_tv, is_sp_string, *didcomma, *didcolon), \ 210 stack, container_stack, \ 211 &p, next_map_special, didcomma, didcolon) \ 212 == FAIL) { \ 213 goto parse_json_string_fail; \ 214 } \ 215 if (*next_map_special) { \ 216 goto parse_json_string_ret; \ 217 } \ 218 } while (0) 219 220 /// Create a new special dictionary that ought to represent a MAP 221 /// 222 /// @param[out] ret_tv Address where new special dictionary is saved. 223 /// @param[in] len Expected number of items to be populated before list 224 /// becomes accessible from Vimscript. It is still valid to 225 /// underpopulate a list, value only controls how many elements 226 /// will be allocated in advance. @see ListLenSpecials. 227 /// 228 /// @return [allocated] list which should contain key-value pairs. Return value 229 /// may be safely ignored. 230 list_T *decode_create_map_special_dict(typval_T *const ret_tv, const ptrdiff_t len) 231 FUNC_ATTR_NONNULL_ALL 232 { 233 list_T *const list = tv_list_alloc(len); 234 tv_list_ref(list); 235 create_special_dict(ret_tv, kMPMap, ((typval_T) { 236 .v_type = VAR_LIST, 237 .v_lock = VAR_UNLOCKED, 238 .vval = { .v_list = list }, 239 })); 240 return list; 241 } 242 243 /// Convert char* string to typval_T 244 /// 245 /// Depending on whether string has (no) NUL bytes, it may use a special 246 /// dictionary, VAR_BLOB, or decode string to VAR_STRING. 247 /// 248 /// @param[in] s String to decode. 249 /// @param[in] len String length. 250 /// @param[in] force_blob whether string always should be decoded as a blob, 251 /// or only when embedded NUL bytes were present 252 /// @param[in] s_allocated If true, then `s` was allocated and can be saved in 253 /// a returned structure. If it is not saved there, it 254 /// will be freed. 255 /// 256 /// @return Decoded string. 257 typval_T decode_string(const char *const s, const size_t len, bool force_blob, 258 const bool s_allocated) 259 FUNC_ATTR_WARN_UNUSED_RESULT 260 { 261 assert(s != NULL || len == 0); 262 const bool use_blob = force_blob || ((s != NULL) && (memchr(s, NUL, len) != NULL)); 263 if (use_blob) { 264 typval_T tv; 265 tv.v_lock = VAR_UNLOCKED; 266 blob_T *b = tv_blob_alloc_ret(&tv); 267 if (s_allocated) { 268 b->bv_ga.ga_data = (void *)s; 269 b->bv_ga.ga_len = (int)len; 270 b->bv_ga.ga_maxlen = (int)len; 271 } else { 272 ga_concat_len(&b->bv_ga, s, len); 273 } 274 return tv; 275 } 276 return (typval_T) { 277 .v_type = VAR_STRING, 278 .v_lock = VAR_UNLOCKED, 279 .vval = { .v_string = ((s == NULL || s_allocated) ? (char *)s : xmemdupz(s, len)) }, 280 }; 281 } 282 283 /// Parse JSON double-quoted string 284 /// 285 /// @param[in] buf Buffer being converted. 286 /// @param[in] buf_len Length of the buffer. 287 /// @param[in,out] pp Pointer to the start of the string. Must point to '"'. 288 /// Is advanced to the closing '"'. Also see 289 /// json_decoder_pop(), it may set pp to another location 290 /// and alter next_map_special, didcomma and didcolon. 291 /// @param[out] stack Object stack. 292 /// @param[out] container_stack Container objects stack. 293 /// @param[out] next_map_special Is set to true when dictionary is converted 294 /// to a special map, otherwise not touched. 295 /// @param[out] didcomma True if previous token was comma. Is set to recorded 296 /// value when decoder is restarted, otherwise unused. 297 /// @param[out] didcolon True if previous token was colon. Is set to recorded 298 /// value when decoder is restarted, otherwise unused. 299 /// 300 /// @return OK in case of success, FAIL in case of error. 301 static inline int parse_json_string(const char *const buf, const size_t buf_len, 302 const char **const pp, ValuesStack *const stack, 303 ContainerStack *const container_stack, 304 bool *const next_map_special, bool *const didcomma, 305 bool *const didcolon) 306 FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_ALWAYS_INLINE 307 { 308 const char *const e = buf + buf_len; 309 const char *p = *pp; 310 size_t len = 0; 311 const char *const s = ++p; 312 int ret = OK; 313 while (p < e && *p != '"') { 314 if (*p == '\\') { 315 p++; 316 if (p == e) { 317 semsg(_("E474: Unfinished escape sequence: %.*s"), 318 (int)buf_len, buf); 319 goto parse_json_string_fail; 320 } 321 switch (*p) { 322 case 'u': 323 if (p + 4 >= e) { 324 semsg(_("E474: Unfinished unicode escape sequence: %.*s"), 325 (int)buf_len, buf); 326 goto parse_json_string_fail; 327 } else if (!ascii_isxdigit(p[1]) 328 || !ascii_isxdigit(p[2]) 329 || !ascii_isxdigit(p[3]) 330 || !ascii_isxdigit(p[4])) { 331 semsg(_("E474: Expected four hex digits after \\u: %.*s"), 332 LENP(p - 1, e)); 333 goto parse_json_string_fail; 334 } 335 // One UTF-8 character below U+10000 can take up to 3 bytes, 336 // above up to 6, but they are encoded using two \u escapes. 337 len += 3; 338 p += 5; 339 break; 340 case '\\': 341 case '/': 342 case '"': 343 case 't': 344 case 'b': 345 case 'n': 346 case 'r': 347 case 'f': 348 len++; 349 p++; 350 break; 351 default: 352 semsg(_("E474: Unknown escape sequence: %.*s"), LENP(p - 1, e)); 353 goto parse_json_string_fail; 354 } 355 } else { 356 uint8_t p_byte = (uint8_t)(*p); 357 // unescaped = %x20-21 / %x23-5B / %x5D-10FFFF 358 if (p_byte < 0x20) { 359 semsg(_("E474: ASCII control characters cannot be present " 360 "inside string: %.*s"), LENP(p, e)); 361 goto parse_json_string_fail; 362 } 363 const int ch = utf_ptr2char(p); 364 // All characters above U+007F are encoded using two or more bytes 365 // and thus cannot possibly be equal to *p. But utf_ptr2char({0xFF, 366 // 0}) will return 0xFF, even though 0xFF cannot start any UTF-8 367 // code point at all. 368 // 369 // The only exception is U+00C3 which is represented as 0xC3 0x83. 370 if (ch >= 0x80 && p_byte == ch 371 && !(ch == 0xC3 && p + 1 < e && (uint8_t)p[1] == 0x83)) { 372 semsg(_("E474: Only UTF-8 strings allowed: %.*s"), LENP(p, e)); 373 goto parse_json_string_fail; 374 } else if (ch > 0x10FFFF) { 375 semsg(_("E474: Only UTF-8 code points up to U+10FFFF " 376 "are allowed to appear unescaped: %.*s"), LENP(p, e)); 377 goto parse_json_string_fail; 378 } 379 const size_t ch_len = (size_t)utf_char2len(ch); 380 assert(ch_len == (size_t)(ch ? utf_ptr2len(p) : 1)); 381 len += ch_len; 382 p += ch_len; 383 } 384 } 385 if (p == e || *p != '"') { 386 semsg(_("E474: Expected string end: %.*s"), (int)buf_len, buf); 387 goto parse_json_string_fail; 388 } 389 char *str = xmalloc(len + 1); 390 int fst_in_pair = 0; 391 char *str_end = str; 392 #define PUT_FST_IN_PAIR(fst_in_pair, str_end) \ 393 do { \ 394 if ((fst_in_pair) != 0) { \ 395 (str_end) += utf_char2bytes(fst_in_pair, (str_end)); \ 396 (fst_in_pair) = 0; \ 397 } \ 398 } while (0) 399 for (const char *t = s; t < p; t++) { 400 if (t[0] != '\\' || t[1] != 'u') { 401 PUT_FST_IN_PAIR(fst_in_pair, str_end); 402 } 403 if (*t == '\\') { 404 t++; 405 switch (*t) { 406 case 'u': { 407 const char ubuf[] = { t[1], t[2], t[3], t[4] }; 408 t += 4; 409 uvarnumber_T ch; 410 vim_str2nr(ubuf, NULL, NULL, 411 STR2NR_HEX | STR2NR_FORCE, NULL, &ch, 4, true, NULL); 412 if (SURROGATE_HI_START <= ch && ch <= SURROGATE_HI_END) { 413 PUT_FST_IN_PAIR(fst_in_pair, str_end); 414 fst_in_pair = (int)ch; 415 } else if (SURROGATE_LO_START <= ch && ch <= SURROGATE_LO_END 416 && fst_in_pair != 0) { 417 const int full_char = ((int)(ch - SURROGATE_LO_START) 418 + ((fst_in_pair - SURROGATE_HI_START) << 10) 419 + SURROGATE_FIRST_CHAR); 420 str_end += utf_char2bytes(full_char, str_end); 421 fst_in_pair = 0; 422 } else { 423 PUT_FST_IN_PAIR(fst_in_pair, str_end); 424 str_end += utf_char2bytes((int)ch, str_end); 425 } 426 break; 427 } 428 case '\\': 429 case '/': 430 case '"': 431 case 't': 432 case 'b': 433 case 'n': 434 case 'r': 435 case 'f': { 436 static const char escapes[] = { 437 ['\\'] = '\\', 438 ['/'] = '/', 439 ['"'] = '"', 440 ['t'] = TAB, 441 ['b'] = BS, 442 ['n'] = NL, 443 ['r'] = CAR, 444 ['f'] = FF, 445 }; 446 *str_end++ = escapes[(int)(*t)]; 447 break; 448 } 449 default: 450 abort(); 451 } 452 } else { 453 *str_end++ = *t; 454 } 455 } 456 PUT_FST_IN_PAIR(fst_in_pair, str_end); 457 #undef PUT_FST_IN_PAIR 458 *str_end = NUL; 459 typval_T obj = decode_string(str, (size_t)(str_end - str), false, true); 460 POP(obj, obj.v_type != VAR_STRING); 461 goto parse_json_string_ret; 462 parse_json_string_fail: 463 ret = FAIL; 464 parse_json_string_ret: 465 *pp = p; 466 return ret; 467 } 468 469 #undef POP 470 471 /// Parse JSON number: both floating-point and integer 472 /// 473 /// Number format: `-?\d+(?:.\d+)?(?:[eE][+-]?\d+)?`. 474 /// 475 /// @param[in] buf Buffer being converted. 476 /// @param[in] buf_len Length of the buffer. 477 /// @param[in,out] pp Pointer to the start of the number. Must point to 478 /// a digit or a minus sign. Is advanced to the last 479 /// character of the number. Also see json_decoder_pop(), it 480 /// may set pp to another location and alter 481 /// next_map_special, didcomma and didcolon. 482 /// @param[out] stack Object stack. 483 /// @param[out] container_stack Container objects stack. 484 /// @param[out] next_map_special Is set to true when dictionary is converted 485 /// to a special map, otherwise not touched. 486 /// @param[out] didcomma True if previous token was comma. Is set to recorded 487 /// value when decoder is restarted, otherwise unused. 488 /// @param[out] didcolon True if previous token was colon. Is set to recorded 489 /// value when decoder is restarted, otherwise unused. 490 /// 491 /// @return OK in case of success, FAIL in case of error. 492 static inline int parse_json_number(const char *const buf, const size_t buf_len, 493 const char **const pp, ValuesStack *const stack, 494 ContainerStack *const container_stack, 495 bool *const next_map_special, bool *const didcomma, 496 bool *const didcolon) 497 FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_ALWAYS_INLINE 498 { 499 const char *const e = buf + buf_len; 500 const char *p = *pp; 501 int ret = OK; 502 const char *const s = p; 503 const char *ints = NULL; 504 const char *fracs = NULL; 505 const char *exps = NULL; 506 const char *exps_s = NULL; 507 if (*p == '-') { 508 p++; 509 } 510 ints = p; 511 if (p >= e) { 512 goto parse_json_number_check; 513 } 514 while (p < e && ascii_isdigit(*p)) { 515 p++; 516 } 517 if (p != ints + 1 && *ints == '0') { 518 semsg(_("E474: Leading zeroes are not allowed: %.*s"), LENP(s, e)); 519 goto parse_json_number_fail; 520 } 521 if (p >= e || p == ints) { 522 goto parse_json_number_check; 523 } 524 if (*p == '.') { 525 p++; 526 fracs = p; 527 while (p < e && ascii_isdigit(*p)) { 528 p++; 529 } 530 if (p >= e || p == fracs) { 531 goto parse_json_number_check; 532 } 533 } 534 if (*p == 'e' || *p == 'E') { 535 p++; 536 exps_s = p; 537 if (p < e && (*p == '-' || *p == '+')) { 538 p++; 539 } 540 exps = p; 541 while (p < e && ascii_isdigit(*p)) { 542 p++; 543 } 544 } 545 parse_json_number_check: 546 if (p == ints) { 547 semsg(_("E474: Missing number after minus sign: %.*s"), LENP(s, e)); 548 goto parse_json_number_fail; 549 } else if (p == fracs || (fracs != NULL && exps_s == fracs + 1)) { 550 semsg(_("E474: Missing number after decimal dot: %.*s"), LENP(s, e)); 551 goto parse_json_number_fail; 552 } else if (p == exps) { 553 semsg(_("E474: Missing exponent: %.*s"), LENP(s, e)); 554 goto parse_json_number_fail; 555 } 556 typval_T tv = { 557 .v_type = VAR_NUMBER, 558 .v_lock = VAR_UNLOCKED, 559 }; 560 const size_t exp_num_len = (size_t)(p - s); 561 if (fracs || exps) { 562 // Convert floating-point number 563 const size_t num_len = string2float(s, &tv.vval.v_float); 564 if (exp_num_len != num_len) { 565 semsg(_("E685: internal error: while converting number \"%.*s\" " 566 "to float string2float consumed %zu bytes in place of %zu"), 567 (int)exp_num_len, s, num_len, exp_num_len); 568 } 569 tv.v_type = VAR_FLOAT; 570 } else { 571 // Convert integer 572 varnumber_T nr; 573 int num_len; 574 vim_str2nr(s, NULL, &num_len, 0, &nr, NULL, (int)(p - s), true, NULL); 575 if ((int)exp_num_len != num_len) { 576 semsg(_("E685: internal error: while converting number \"%.*s\" " 577 "to integer vim_str2nr consumed %i bytes in place of %zu"), 578 (int)exp_num_len, s, num_len, exp_num_len); 579 } 580 tv.vval.v_number = nr; 581 } 582 if (json_decoder_pop(OBJ(tv, false, *didcomma, *didcolon), 583 stack, container_stack, 584 &p, next_map_special, didcomma, didcolon) == FAIL) { 585 goto parse_json_number_fail; 586 } 587 if (*next_map_special) { 588 goto parse_json_number_ret; 589 } 590 p--; 591 goto parse_json_number_ret; 592 parse_json_number_fail: 593 ret = FAIL; 594 parse_json_number_ret: 595 *pp = p; 596 return ret; 597 } 598 599 #define POP(obj_tv, is_sp_string) \ 600 do { \ 601 if (json_decoder_pop(OBJ(obj_tv, is_sp_string, didcomma, didcolon), \ 602 &stack, &container_stack, \ 603 &p, &next_map_special, &didcomma, &didcolon) \ 604 == FAIL) { \ 605 goto json_decode_string_fail; \ 606 } \ 607 if (next_map_special) { \ 608 goto json_decode_string_cycle_start; \ 609 } \ 610 } while (0) 611 612 /// Convert JSON string into Vimscript object 613 /// 614 /// @param[in] buf String to convert. UTF-8 encoding is assumed. 615 /// @param[in] buf_len Length of the string. 616 /// @param[out] rettv Location where to save results. 617 /// 618 /// @return OK in case of success, FAIL otherwise. 619 int json_decode_string(const char *const buf, const size_t buf_len, typval_T *const rettv) 620 FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT 621 { 622 const char *p = buf; 623 const char *const e = buf + buf_len; 624 while (p < e && (*p == ' ' || *p == TAB || *p == NL || *p == CAR)) { 625 p++; 626 } 627 if (p == e) { 628 emsg(_("E474: Attempt to decode a blank string")); 629 return FAIL; 630 } 631 int ret = OK; 632 ValuesStack stack = KV_INITIAL_VALUE; 633 ContainerStack container_stack = KV_INITIAL_VALUE; 634 rettv->v_type = VAR_UNKNOWN; 635 bool didcomma = false; 636 bool didcolon = false; 637 bool next_map_special = false; 638 for (; p < e; p++) { 639 json_decode_string_cycle_start: 640 assert(*p == '{' || next_map_special == false); 641 switch (*p) { 642 case '}': 643 case ']': { 644 if (kv_size(container_stack) == 0) { 645 semsg(_("E474: No container to close: %.*s"), LENP(p, e)); 646 goto json_decode_string_fail; 647 } 648 ContainerStackItem last_container = kv_last(container_stack); 649 if (*p == '}' && last_container.container.v_type != VAR_DICT) { 650 semsg(_("E474: Closing list with curly bracket: %.*s"), LENP(p, e)); 651 goto json_decode_string_fail; 652 } else if (*p == ']' && last_container.container.v_type != VAR_LIST) { 653 semsg(_("E474: Closing dictionary with square bracket: %.*s"), 654 LENP(p, e)); 655 goto json_decode_string_fail; 656 } else if (didcomma) { 657 semsg(_("E474: Trailing comma: %.*s"), LENP(p, e)); 658 goto json_decode_string_fail; 659 } else if (didcolon) { 660 semsg(_("E474: Expected value after colon: %.*s"), LENP(p, e)); 661 goto json_decode_string_fail; 662 } else if (last_container.stack_index != kv_size(stack) - 1) { 663 assert(last_container.stack_index < kv_size(stack) - 1); 664 semsg(_("E474: Expected value: %.*s"), LENP(p, e)); 665 goto json_decode_string_fail; 666 } 667 if (kv_size(stack) == 1) { 668 p++; 669 (void)kv_pop(container_stack); 670 goto json_decode_string_after_cycle; 671 } else { 672 if (json_decoder_pop(kv_pop(stack), &stack, &container_stack, &p, 673 &next_map_special, &didcomma, &didcolon) 674 == FAIL) { 675 goto json_decode_string_fail; 676 } 677 assert(!next_map_special); 678 break; 679 } 680 } 681 case ',': { 682 if (kv_size(container_stack) == 0) { 683 semsg(_("E474: Comma not inside container: %.*s"), LENP(p, e)); 684 goto json_decode_string_fail; 685 } 686 ContainerStackItem last_container = kv_last(container_stack); 687 if (didcomma) { 688 semsg(_("E474: Duplicate comma: %.*s"), LENP(p, e)); 689 goto json_decode_string_fail; 690 } else if (didcolon) { 691 semsg(_("E474: Comma after colon: %.*s"), LENP(p, e)); 692 goto json_decode_string_fail; 693 } else if (last_container.container.v_type == VAR_DICT 694 && last_container.stack_index != kv_size(stack) - 1) { 695 semsg(_("E474: Using comma in place of colon: %.*s"), LENP(p, e)); 696 goto json_decode_string_fail; 697 } else if (last_container.special_val == NULL 698 ? (last_container.container.v_type == VAR_DICT 699 ? (DICT_LEN(last_container.container.vval.v_dict) == 0) 700 : (tv_list_len(last_container.container.vval.v_list) 701 == 0)) 702 : (tv_list_len(last_container.special_val) == 0)) { 703 semsg(_("E474: Leading comma: %.*s"), LENP(p, e)); 704 goto json_decode_string_fail; 705 } 706 didcomma = true; 707 continue; 708 } 709 case ':': { 710 if (kv_size(container_stack) == 0) { 711 semsg(_("E474: Colon not inside container: %.*s"), LENP(p, e)); 712 goto json_decode_string_fail; 713 } 714 ContainerStackItem last_container = kv_last(container_stack); 715 if (last_container.container.v_type != VAR_DICT) { 716 semsg(_("E474: Using colon not in dictionary: %.*s"), LENP(p, e)); 717 goto json_decode_string_fail; 718 } else if (last_container.stack_index != kv_size(stack) - 2) { 719 semsg(_("E474: Unexpected colon: %.*s"), LENP(p, e)); 720 goto json_decode_string_fail; 721 } else if (didcomma) { 722 semsg(_("E474: Colon after comma: %.*s"), LENP(p, e)); 723 goto json_decode_string_fail; 724 } else if (didcolon) { 725 semsg(_("E474: Duplicate colon: %.*s"), LENP(p, e)); 726 goto json_decode_string_fail; 727 } 728 didcolon = true; 729 continue; 730 } 731 case ' ': 732 case TAB: 733 case NL: 734 case CAR: 735 continue; 736 case 'n': 737 if ((p + 3) >= e || strncmp(p + 1, "ull", 3) != 0) { 738 semsg(_("E474: Expected null: %.*s"), LENP(p, e)); 739 goto json_decode_string_fail; 740 } 741 p += 3; 742 POP(((typval_T) { 743 .v_type = VAR_SPECIAL, 744 .v_lock = VAR_UNLOCKED, 745 .vval = { .v_special = kSpecialVarNull }, 746 }), false); 747 break; 748 case 't': 749 if ((p + 3) >= e || strncmp(p + 1, "rue", 3) != 0) { 750 semsg(_("E474: Expected true: %.*s"), LENP(p, e)); 751 goto json_decode_string_fail; 752 } 753 p += 3; 754 POP(((typval_T) { 755 .v_type = VAR_BOOL, 756 .v_lock = VAR_UNLOCKED, 757 .vval = { .v_bool = kBoolVarTrue }, 758 }), false); 759 break; 760 case 'f': 761 if ((p + 4) >= e || strncmp(p + 1, "alse", 4) != 0) { 762 semsg(_("E474: Expected false: %.*s"), LENP(p, e)); 763 goto json_decode_string_fail; 764 } 765 p += 4; 766 POP(((typval_T) { 767 .v_type = VAR_BOOL, 768 .v_lock = VAR_UNLOCKED, 769 .vval = { .v_bool = kBoolVarFalse }, 770 }), false); 771 break; 772 case '"': 773 if (parse_json_string(buf, buf_len, &p, &stack, &container_stack, 774 &next_map_special, &didcomma, &didcolon) 775 == FAIL) { 776 // Error message was already given 777 goto json_decode_string_fail; 778 } 779 if (next_map_special) { 780 goto json_decode_string_cycle_start; 781 } 782 break; 783 case '-': 784 case '0': 785 case '1': 786 case '2': 787 case '3': 788 case '4': 789 case '5': 790 case '6': 791 case '7': 792 case '8': 793 case '9': 794 if (parse_json_number(buf, buf_len, &p, &stack, &container_stack, 795 &next_map_special, &didcomma, &didcolon) 796 == FAIL) { 797 // Error message was already given 798 goto json_decode_string_fail; 799 } 800 if (next_map_special) { 801 goto json_decode_string_cycle_start; 802 } 803 break; 804 case '[': { 805 list_T *list = tv_list_alloc(kListLenMayKnow); 806 tv_list_ref(list); 807 typval_T tv = { 808 .v_type = VAR_LIST, 809 .v_lock = VAR_UNLOCKED, 810 .vval = { .v_list = list }, 811 }; 812 kv_push(container_stack, ((ContainerStackItem) { .stack_index = kv_size(stack), 813 .s = p, 814 .container = tv, 815 .special_val = NULL })); 816 kv_push(stack, OBJ(tv, false, didcomma, didcolon)); 817 break; 818 } 819 case '{': { 820 typval_T tv; 821 list_T *val_list = NULL; 822 if (next_map_special) { 823 next_map_special = false; 824 val_list = decode_create_map_special_dict(&tv, kListLenMayKnow); 825 } else { 826 dict_T *dict = tv_dict_alloc(); 827 dict->dv_refcount++; 828 tv = (typval_T) { 829 .v_type = VAR_DICT, 830 .v_lock = VAR_UNLOCKED, 831 .vval = { .v_dict = dict }, 832 }; 833 } 834 kv_push(container_stack, ((ContainerStackItem) { .stack_index = kv_size(stack), 835 .s = p, 836 .container = tv, 837 .special_val = val_list })); 838 kv_push(stack, OBJ(tv, false, didcomma, didcolon)); 839 break; 840 } 841 default: 842 semsg(_("E474: Unidentified byte: %.*s"), LENP(p, e)); 843 goto json_decode_string_fail; 844 } 845 didcomma = false; 846 didcolon = false; 847 if (kv_size(container_stack) == 0) { 848 p++; 849 break; 850 } 851 } 852 json_decode_string_after_cycle: 853 for (; p < e; p++) { 854 switch (*p) { 855 case NL: 856 case ' ': 857 case TAB: 858 case CAR: 859 break; 860 default: 861 semsg(_("E474: Trailing characters: %.*s"), LENP(p, e)); 862 goto json_decode_string_fail; 863 } 864 } 865 if (kv_size(stack) == 1 && kv_size(container_stack) == 0) { 866 *rettv = kv_pop(stack).val; 867 goto json_decode_string_ret; 868 } 869 semsg(_("E474: Unexpected end of input: %.*s"), (int)buf_len, buf); 870 json_decode_string_fail: 871 ret = FAIL; 872 while (kv_size(stack)) { 873 tv_clear(&(kv_pop(stack).val)); 874 } 875 json_decode_string_ret: 876 kv_destroy(stack); 877 kv_destroy(container_stack); 878 return ret; 879 } 880 881 #undef LENP 882 #undef POP 883 884 #undef OBJ 885 886 #undef DICT_LEN 887 888 static void positive_integer_to_special_typval(typval_T *rettv, uint64_t val) 889 { 890 if (val <= VARNUMBER_MAX) { 891 *rettv = (typval_T) { 892 .v_type = VAR_NUMBER, 893 .v_lock = VAR_UNLOCKED, 894 .vval = { .v_number = (varnumber_T)val }, 895 }; 896 } else { 897 list_T *const list = tv_list_alloc(4); 898 tv_list_ref(list); 899 create_special_dict(rettv, kMPInteger, ((typval_T) { 900 .v_type = VAR_LIST, 901 .v_lock = VAR_UNLOCKED, 902 .vval = { .v_list = list }, 903 })); 904 tv_list_append_number(list, 1); 905 tv_list_append_number(list, (varnumber_T)((val >> 62) & 0x3)); 906 tv_list_append_number(list, (varnumber_T)((val >> 31) & 0x7FFFFFFF)); 907 tv_list_append_number(list, (varnumber_T)(val & 0x7FFFFFFF)); 908 } 909 } 910 911 static void typval_parse_enter(mpack_parser_t *parser, mpack_node_t *node) 912 { 913 typval_T *result = NULL; 914 915 mpack_node_t *parent = MPACK_PARENT_NODE(node); 916 if (parent) { 917 switch (parent->tok.type) { 918 case MPACK_TOKEN_ARRAY: { 919 list_T *list = parent->data[1].p; 920 result = tv_list_append_owned_tv(list, (typval_T) { .v_type = VAR_UNKNOWN }); 921 break; 922 } 923 case MPACK_TOKEN_MAP: { 924 typval_T(*items)[2] = parent->data[1].p; 925 result = &items[parent->pos][parent->key_visited]; 926 break; 927 } 928 929 case MPACK_TOKEN_STR: 930 case MPACK_TOKEN_BIN: 931 case MPACK_TOKEN_EXT: 932 assert(node->tok.type == MPACK_TOKEN_CHUNK); 933 break; 934 935 default: 936 abort(); 937 } 938 } else { 939 result = parser->data.p; 940 } 941 942 // for types that are completed in typval_parse_exit 943 node->data[0].p = result; 944 node->data[1].p = NULL; // free on error if non-NULL 945 946 switch (node->tok.type) { 947 case MPACK_TOKEN_NIL: 948 *result = (typval_T) { 949 .v_type = VAR_SPECIAL, 950 .v_lock = VAR_UNLOCKED, 951 .vval = { .v_special = kSpecialVarNull }, 952 }; 953 break; 954 case MPACK_TOKEN_BOOLEAN: 955 *result = (typval_T) { 956 .v_type = VAR_BOOL, 957 .v_lock = VAR_UNLOCKED, 958 .vval = { 959 .v_bool = mpack_unpack_boolean(node->tok) ? kBoolVarTrue : kBoolVarFalse 960 }, 961 }; 962 break; 963 case MPACK_TOKEN_SINT: { 964 *result = (typval_T) { 965 .v_type = VAR_NUMBER, 966 .v_lock = VAR_UNLOCKED, 967 .vval = { .v_number = mpack_unpack_sint(node->tok) }, 968 }; 969 break; 970 } 971 case MPACK_TOKEN_UINT: 972 positive_integer_to_special_typval(result, mpack_unpack_uint(node->tok)); 973 break; 974 case MPACK_TOKEN_FLOAT: 975 *result = (typval_T) { 976 .v_type = VAR_FLOAT, 977 .v_lock = VAR_UNLOCKED, 978 .vval = { .v_float = mpack_unpack_float(node->tok) }, 979 }; 980 break; 981 982 case MPACK_TOKEN_BIN: 983 case MPACK_TOKEN_STR: 984 case MPACK_TOKEN_EXT: 985 // actually converted in typval_parse_exit after the data chunks 986 node->data[1].p = xmallocz(node->tok.length); 987 break; 988 case MPACK_TOKEN_CHUNK: { 989 char *data = parent->data[1].p; 990 memcpy(data + parent->pos, 991 node->tok.data.chunk_ptr, node->tok.length); 992 break; 993 } 994 995 case MPACK_TOKEN_ARRAY: { 996 list_T *const list = tv_list_alloc((ptrdiff_t)node->tok.length); 997 tv_list_ref(list); 998 *result = (typval_T) { 999 .v_type = VAR_LIST, 1000 .v_lock = VAR_UNLOCKED, 1001 .vval = { .v_list = list }, 1002 }; 1003 node->data[1].p = list; 1004 break; 1005 } 1006 case MPACK_TOKEN_MAP: 1007 // we don't know if this will be safe to convert to a typval dict yet 1008 node->data[1].p = xmallocz(node->tok.length * 2 * sizeof(typval_T)); 1009 break; 1010 } 1011 } 1012 1013 /// Free node which was entered but never exited, due to a nested error 1014 /// 1015 /// Don't bother with typvals as these will be GC:d eventually 1016 void typval_parser_error_free(mpack_parser_t *parser) 1017 { 1018 for (uint32_t i = 0; i < parser->size; i++) { 1019 mpack_node_t *node = &parser->items[i]; 1020 switch (node->tok.type) { 1021 case MPACK_TOKEN_BIN: 1022 case MPACK_TOKEN_STR: 1023 case MPACK_TOKEN_EXT: 1024 case MPACK_TOKEN_MAP: 1025 XFREE_CLEAR(node->data[1].p); 1026 break; 1027 default: 1028 break; 1029 } 1030 } 1031 } 1032 1033 static void typval_parse_exit(mpack_parser_t *parser, mpack_node_t *node) 1034 { 1035 typval_T *result = node->data[0].p; 1036 switch (node->tok.type) { 1037 case MPACK_TOKEN_BIN: 1038 case MPACK_TOKEN_STR: 1039 *result = decode_string(node->data[1].p, node->tok.length, false, true); 1040 node->data[1].p = NULL; 1041 break; 1042 1043 case MPACK_TOKEN_EXT: { 1044 list_T *const list = tv_list_alloc(2); 1045 tv_list_ref(list); 1046 tv_list_append_number(list, node->tok.data.ext_type); 1047 list_T *const ext_val_list = tv_list_alloc(kListLenMayKnow); 1048 tv_list_append_list(list, ext_val_list); 1049 create_special_dict(result, kMPExt, ((typval_T) { .v_type = VAR_LIST, 1050 .v_lock = VAR_UNLOCKED, 1051 .vval = { .v_list = list } })); 1052 // TODO(bfredl): why not use BLOB? 1053 encode_list_write((void *)ext_val_list, node->data[1].p, node->tok.length); 1054 XFREE_CLEAR(node->data[1].p); 1055 } 1056 break; 1057 1058 case MPACK_TOKEN_MAP: { 1059 typval_T(*items)[2] = node->data[1].p; 1060 for (size_t i = 0; i < node->tok.length; i++) { 1061 typval_T *key = &items[i][0]; 1062 if (key->v_type != VAR_STRING 1063 || key->vval.v_string == NULL 1064 || key->vval.v_string[0] == NUL) { 1065 goto msgpack_to_vim_generic_map; 1066 } 1067 } 1068 dict_T *const dict = tv_dict_alloc(); 1069 dict->dv_refcount++; 1070 *result = (typval_T) { 1071 .v_type = VAR_DICT, 1072 .v_lock = VAR_UNLOCKED, 1073 .vval = { .v_dict = dict }, 1074 }; 1075 for (size_t i = 0; i < node->tok.length; i++) { 1076 char *key = items[i][0].vval.v_string; 1077 size_t keylen = strlen(key); 1078 dictitem_T *const di = xmallocz(offsetof(dictitem_T, di_key) + keylen); 1079 memcpy(&di->di_key[0], key, keylen); 1080 di->di_tv.v_type = VAR_UNKNOWN; 1081 if (tv_dict_add(dict, di) == FAIL) { 1082 // Duplicate key: fallback to generic map 1083 TV_DICT_ITER(dict, d, { 1084 d->di_tv.v_type = VAR_SPECIAL; // don't free values in tv_clear(), they will be reused 1085 d->di_tv.vval.v_special = kSpecialVarNull; 1086 }); 1087 tv_clear(result); 1088 xfree(di); 1089 goto msgpack_to_vim_generic_map; 1090 } 1091 di->di_tv = items[i][1]; 1092 } 1093 for (size_t i = 0; i < node->tok.length; i++) { 1094 xfree(items[i][0].vval.v_string); 1095 } 1096 XFREE_CLEAR(node->data[1].p); 1097 break; 1098 msgpack_to_vim_generic_map: {} 1099 list_T *const list = decode_create_map_special_dict(result, node->tok.length); 1100 for (size_t i = 0; i < node->tok.length; i++) { 1101 list_T *const kv_pair = tv_list_alloc(2); 1102 tv_list_append_list(list, kv_pair); 1103 1104 tv_list_append_owned_tv(kv_pair, items[i][0]); 1105 tv_list_append_owned_tv(kv_pair, items[i][1]); 1106 } 1107 XFREE_CLEAR(node->data[1].p); 1108 break; 1109 } 1110 1111 default: 1112 // other kinds are handled completely in typval_parse_enter 1113 break; 1114 } 1115 } 1116 1117 int mpack_parse_typval(mpack_parser_t *parser, const char **data, size_t *size) 1118 { 1119 return mpack_parse(parser, data, size, typval_parse_enter, typval_parse_exit); 1120 } 1121 1122 int unpack_typval(const char **data, size_t *size, typval_T *ret) 1123 { 1124 ret->v_type = VAR_UNKNOWN; 1125 mpack_parser_t parser; 1126 mpack_parser_init(&parser, 0); 1127 parser.data.p = ret; 1128 int status = mpack_parse_typval(&parser, data, size); 1129 if (status != MPACK_OK) { 1130 typval_parser_error_free(&parser); 1131 tv_clear(ret); 1132 } 1133 return status; 1134 }