unpacker.c (19354B)
1 #include <assert.h> 2 #include <stdbool.h> 3 #include <stdlib.h> 4 #include <uv.h> 5 6 #include "klib/kvec.h" 7 #include "mpack/conv.h" 8 #include "nvim/api/private/helpers.h" 9 #include "nvim/ascii_defs.h" 10 #include "nvim/grid.h" 11 #include "nvim/macros_defs.h" 12 #include "nvim/memory.h" 13 #include "nvim/msgpack_rpc/channel_defs.h" 14 #include "nvim/msgpack_rpc/unpacker.h" 15 #include "nvim/strings.h" 16 #include "nvim/ui_client.h" 17 18 #include "msgpack_rpc/unpacker.c.generated.h" 19 20 Object unpack(const char *data, size_t size, Arena *arena, Error *err) 21 { 22 Unpacker unpacker; 23 mpack_parser_init(&unpacker.parser, 0); 24 unpacker.parser.data.p = &unpacker; 25 unpacker.arena = *arena; 26 27 int result = mpack_parse(&unpacker.parser, &data, &size, 28 api_parse_enter, api_parse_exit); 29 30 *arena = unpacker.arena; 31 32 if (result == MPACK_NOMEM) { 33 api_set_error(err, kErrorTypeException, "object was too deep to unpack"); 34 } else if (result == MPACK_EOF) { 35 api_set_error(err, kErrorTypeException, "incomplete msgpack string"); 36 } else if (result == MPACK_ERROR) { 37 api_set_error(err, kErrorTypeException, "invalid msgpack string"); 38 } else if (result == MPACK_OK && size) { 39 api_set_error(err, kErrorTypeException, "trailing data in msgpack string"); 40 } 41 42 return unpacker.result; 43 } 44 45 static void api_parse_enter(mpack_parser_t *parser, mpack_node_t *node) 46 { 47 Unpacker *p = parser->data.p; 48 Object *result = NULL; 49 String *key_location = NULL; 50 51 mpack_node_t *parent = MPACK_PARENT_NODE(node); 52 if (parent) { 53 switch (parent->tok.type) { 54 case MPACK_TOKEN_ARRAY: { 55 Object *obj = parent->data[0].p; 56 result = &kv_A(obj->data.array, parent->pos); 57 break; 58 } 59 case MPACK_TOKEN_MAP: { 60 Object *obj = parent->data[0].p; 61 KeyValuePair *kv = &kv_A(obj->data.dict, parent->pos); 62 if (!parent->key_visited) { 63 // TODO(bfredl): when implementing interrupt parse on error, 64 // stop parsing here when node is not a STR/BIN 65 kv->key = (String)STRING_INIT; 66 key_location = &kv->key; 67 } 68 result = &kv->value; 69 break; 70 } 71 72 case MPACK_TOKEN_STR: 73 case MPACK_TOKEN_BIN: 74 case MPACK_TOKEN_EXT: 75 assert(node->tok.type == MPACK_TOKEN_CHUNK); 76 break; 77 78 default: 79 abort(); 80 } 81 } else { 82 result = &p->result; 83 } 84 85 switch (node->tok.type) { 86 case MPACK_TOKEN_NIL: 87 *result = NIL; 88 break; 89 case MPACK_TOKEN_BOOLEAN: 90 *result = BOOLEAN_OBJ(mpack_unpack_boolean(node->tok)); 91 break; 92 case MPACK_TOKEN_SINT: 93 *result = INTEGER_OBJ(mpack_unpack_sint(node->tok)); 94 break; 95 case MPACK_TOKEN_UINT: 96 *result = INTEGER_OBJ((Integer)mpack_unpack_uint(node->tok)); 97 break; 98 case MPACK_TOKEN_FLOAT: 99 *result = FLOAT_OBJ(mpack_unpack_float(node->tok)); 100 break; 101 102 case MPACK_TOKEN_BIN: 103 case MPACK_TOKEN_STR: { 104 char *mem = arena_alloc(&p->arena, node->tok.length + 1, false); 105 mem[node->tok.length] = NUL; 106 String str = { .data = mem, .size = node->tok.length }; 107 if (key_location) { 108 *key_location = str; 109 } else { 110 *result = STRING_OBJ(str); 111 } 112 node->data[0].p = str.data; 113 break; 114 } 115 case MPACK_TOKEN_EXT: 116 // handled in chunk; but save result location 117 node->data[0].p = result; 118 break; 119 case MPACK_TOKEN_CHUNK: 120 assert(parent); 121 if (parent->tok.type == MPACK_TOKEN_STR || parent->tok.type == MPACK_TOKEN_BIN) { 122 char *data = parent->data[0].p; 123 memcpy(data + parent->pos, 124 node->tok.data.chunk_ptr, node->tok.length); 125 } else { 126 Object *res = parent->data[0].p; 127 128 size_t endlen = parent->pos + node->tok.length; 129 if (endlen > MAX_EXT_LEN) { 130 *res = NIL; 131 break; 132 } 133 memcpy(p->ext_buf + parent->pos, 134 node->tok.data.chunk_ptr, node->tok.length); 135 if (parent->pos + node->tok.length < parent->tok.length) { 136 break; // EOF, let's get back to it later 137 } 138 const char *buf = p->ext_buf; 139 size_t size = parent->tok.length; 140 mpack_token_t ext_tok; 141 int status = mpack_rtoken(&buf, &size, &ext_tok); 142 if (status || ext_tok.type != MPACK_TOKEN_UINT) { 143 // TODO(bfredl): once we fixed memory management, we can set 144 // p->unpack_error and a flag like p->interrupted 145 *res = NIL; 146 break; 147 } 148 int ext_type = parent->tok.data.ext_type; 149 if (0 <= ext_type && ext_type <= EXT_OBJECT_TYPE_MAX) { 150 res->type = (ObjectType)(ext_type + EXT_OBJECT_TYPE_SHIFT); 151 res->data.integer = (int64_t)mpack_unpack_uint(ext_tok); 152 } else { 153 *res = NIL; 154 break; 155 } 156 } 157 break; 158 159 case MPACK_TOKEN_ARRAY: { 160 Array arr = KV_INITIAL_VALUE; 161 kv_fixsize_arena(&p->arena, arr, node->tok.length); 162 kv_size(arr) = node->tok.length; 163 *result = ARRAY_OBJ(arr); 164 node->data[0].p = result; 165 break; 166 } 167 case MPACK_TOKEN_MAP: { 168 Dict dict = KV_INITIAL_VALUE; 169 kv_fixsize_arena(&p->arena, dict, node->tok.length); 170 kv_size(dict) = node->tok.length; 171 *result = DICT_OBJ(dict); 172 node->data[0].p = result; 173 break; 174 } 175 } 176 } 177 178 static void api_parse_exit(mpack_parser_t *parser, mpack_node_t *node) 179 { 180 } 181 182 void unpacker_init(Unpacker *p) 183 { 184 mpack_parser_init(&p->parser, 0); 185 p->parser.data.p = p; 186 mpack_tokbuf_init(&p->reader); 187 p->unpack_error = ERROR_INIT; 188 189 p->arena = (Arena)ARENA_EMPTY; 190 191 p->has_grid_line_event = false; 192 } 193 194 void unpacker_teardown(Unpacker *p) 195 { 196 arena_mem_free(arena_finish(&p->arena)); 197 } 198 199 bool unpacker_parse_header(Unpacker *p) 200 { 201 mpack_token_t tok; 202 int result; 203 204 const char *data = p->read_ptr; 205 size_t size = p->read_size; 206 207 assert(!ERROR_SET(&p->unpack_error)); 208 209 // TODO(bfredl): eliminate p->reader, we can use mpack_rtoken directly 210 #define NEXT(tok) \ 211 result = mpack_read(&p->reader, &data, &size, &tok); \ 212 if (result) { goto error; } 213 214 NEXT(tok); 215 if (tok.type != MPACK_TOKEN_ARRAY || tok.length < 3 || tok.length > 4) { 216 goto error; 217 } 218 size_t array_length = tok.length; 219 220 NEXT(tok); 221 if (tok.type != MPACK_TOKEN_UINT) { 222 goto error; 223 } 224 uint32_t type = (uint32_t)mpack_unpack_uint(tok); 225 if ((array_length == 3) ? type != 2 : (type >= 2)) { 226 goto error; 227 } 228 p->type = (MessageType)type; 229 p->request_id = 0; 230 231 if (p->type != kMessageTypeNotification) { 232 NEXT(tok); 233 if (tok.type != MPACK_TOKEN_UINT) { 234 goto error; 235 } 236 p->request_id = (uint32_t)mpack_unpack_uint(tok); 237 } 238 239 if (p->type != kMessageTypeResponse) { 240 NEXT(tok); 241 if ((tok.type != MPACK_TOKEN_STR && tok.type != MPACK_TOKEN_BIN) 242 || tok.length > 100) { 243 goto error; 244 } 245 p->method_name_len = tok.length; 246 247 if (p->method_name_len > 0) { 248 NEXT(tok); 249 assert(tok.type == MPACK_TOKEN_CHUNK); 250 } 251 if (tok.length < p->method_name_len) { 252 result = MPACK_EOF; 253 goto error; 254 } 255 // if this fails, p->handler.fn will be NULL 256 p->handler = msgpack_rpc_get_handler_for(tok.length ? tok.data.chunk_ptr : "", 257 tok.length, &p->unpack_error); 258 } 259 260 p->read_ptr = data; 261 p->read_size = size; 262 return true; 263 #undef NEXT 264 265 error: 266 if (result == MPACK_EOF) { 267 // recover later by retrying from scratch 268 // when more data is available. 269 mpack_tokbuf_init(&p->reader); 270 } else { 271 api_set_error(&p->unpack_error, kErrorTypeValidation, "failed to decode msgpack"); 272 p->state = -1; 273 } 274 return false; 275 } 276 277 // BASIC BITCH STATE MACHINE 278 // 279 // With some basic assumptions, we can parse the overall structure of msgpack-rpc 280 // messages with a hand-rolled FSM of just 3 states (<x> = p->state): 281 // 282 // <0>[0, request_id, method_name, <2>args] 283 // <0>[1, request_id, <1>err, <2>result] 284 // <0>[2, method_name, <2>args] 285 // 286 // The assumption here is that the header of the message, which we define as the 287 // initial array head, the kind integer, request_id and/or method name (when needed), 288 // is relatively small, just ~10 bytes + the method name. Thus we can simply refuse 289 // to advance the stream beyond the header until it can be parsed in its entirety. 290 // 291 // Later on, we want to specialize state 2 into more sub-states depending 292 // on the specific method. "nvim_exec_lua" should just decode direct into lua 293 // objects. For the moment "redraw/grid_line" uses a hand-rolled decoder, 294 // to avoid a blizzard of small objects for each screen cell. 295 // 296 // <0>[2, "redraw", <10>[<11>["method", <12>[args], <12>[args], ...], <11>[...], ...]] 297 // 298 // Where [args] gets unpacked as an Array. Note: first {11} is not saved as a state. 299 // 300 // When method is "grid_line", we furthermore decode a cell at a time like: 301 // 302 // <0>[2, "redraw", <10>[<11>["grid_line", <14>[g, r, c, [<15>[cell], <15>[cell], ...], <16>wrap]], <11>[...], ...]] 303 // 304 // where [cell] is [char, repeat, attr], where 'repeat' and 'attr' is optional 305 306 bool unpacker_advance(Unpacker *p) 307 { 308 assert(p->state >= 0); 309 p->has_grid_line_event = false; 310 if (p->state == 0) { 311 if (!unpacker_parse_header(p)) { 312 return false; 313 } 314 if (p->type == kMessageTypeNotification && p->handler.fn == handle_ui_client_redraw) { 315 p->type = kMessageTypeRedrawEvent; 316 p->state = 10; 317 } else { 318 p->state = p->type == kMessageTypeResponse ? 1 : 2; 319 p->arena = (Arena)ARENA_EMPTY; 320 } 321 } 322 323 if (p->state >= 10 && p->state != 13) { 324 if (!unpacker_parse_redraw(p)) { 325 return false; 326 } 327 328 if (p->state == 16) { 329 // grid_line event already unpacked 330 p->has_grid_line_event = true; 331 goto done; 332 } else { 333 assert(p->state == 12); 334 // unpack other ui events using mpack_parse() 335 p->arena = (Arena)ARENA_EMPTY; 336 p->state = 13; 337 } 338 } 339 340 int result; 341 342 rerun: 343 result = mpack_parse(&p->parser, &p->read_ptr, &p->read_size, 344 api_parse_enter, api_parse_exit); 345 346 if (result == MPACK_EOF) { 347 return false; 348 } else if (result != MPACK_OK) { 349 api_set_error(&p->unpack_error, kErrorTypeValidation, "failed to parse msgpack"); 350 p->state = -1; 351 return false; 352 } 353 354 done: 355 switch (p->state) { 356 case 1: 357 p->error = p->result; 358 p->state = 2; 359 goto rerun; 360 case 2: 361 p->state = 0; 362 return true; 363 case 13: 364 case 16: 365 p->ncalls--; 366 if (p->ncalls > 0) { 367 p->state = (p->state == 16) ? 14 : 12; 368 } else if (p->nevents > 0) { 369 p->state = 11; 370 } else { 371 p->state = 0; 372 } 373 return true; 374 default: 375 abort(); 376 } 377 } 378 379 bool unpacker_parse_redraw(Unpacker *p) 380 { 381 mpack_token_t tok; 382 int result; 383 384 const char *data = p->read_ptr; 385 size_t size = p->read_size; 386 GridLineEvent *g = &p->grid_line_event; 387 388 #define NEXT_TYPE(tok, typ) \ 389 result = mpack_rtoken(&data, &size, &tok); \ 390 if (result == MPACK_EOF) { \ 391 return false; \ 392 } else if (result || (tok.type != typ \ 393 && !(typ == MPACK_TOKEN_STR && tok.type == MPACK_TOKEN_BIN) \ 394 && !(typ == MPACK_TOKEN_SINT && tok.type == MPACK_TOKEN_UINT))) { \ 395 p->state = -1; \ 396 return false; \ 397 } 398 399 switch (p->state) { 400 case 10: 401 NEXT_TYPE(tok, MPACK_TOKEN_ARRAY); 402 p->nevents = (int)tok.length; 403 FALLTHROUGH; 404 405 case 11: 406 NEXT_TYPE(tok, MPACK_TOKEN_ARRAY); 407 p->ncalls = (int)tok.length; 408 409 if (p->ncalls-- == 0) { 410 p->state = -1; 411 return false; 412 } 413 414 NEXT_TYPE(tok, MPACK_TOKEN_STR); 415 if (tok.length > size) { 416 return false; 417 } 418 419 p->ui_handler = ui_client_get_redraw_handler(data, tok.length, NULL); 420 data += tok.length; 421 size -= tok.length; 422 423 p->nevents--; 424 p->read_ptr = data; 425 p->read_size = size; 426 if (p->ui_handler.fn != ui_client_event_grid_line) { 427 p->state = 12; 428 return true; 429 } else { 430 p->state = 14; 431 p->arena = (Arena)ARENA_EMPTY; 432 } 433 FALLTHROUGH; 434 435 case 14: 436 NEXT_TYPE(tok, MPACK_TOKEN_ARRAY); 437 int eventarrsize = (int)tok.length; 438 if (eventarrsize != 5) { 439 p->state = -1; 440 return false; 441 } 442 443 for (int i = 0; i < 3; i++) { 444 NEXT_TYPE(tok, MPACK_TOKEN_UINT); 445 g->args[i] = (int)tok.data.value.lo; 446 } 447 448 NEXT_TYPE(tok, MPACK_TOKEN_ARRAY); 449 g->ncells = (int)tok.length; 450 g->icell = 0; 451 g->coloff = 0; 452 g->cur_attr = -1; 453 454 p->read_ptr = data; 455 p->read_size = size; 456 p->state = 15; 457 FALLTHROUGH; 458 459 case 15: 460 for (; g->icell != g->ncells; g->icell++) { 461 assert(g->icell < g->ncells); 462 463 NEXT_TYPE(tok, MPACK_TOKEN_ARRAY); 464 int cellarrsize = (int)tok.length; 465 if (cellarrsize < 1 || cellarrsize > 3) { 466 p->state = -1; 467 return false; 468 } 469 470 NEXT_TYPE(tok, MPACK_TOKEN_STR); 471 if (tok.length > size) { 472 return false; 473 } 474 475 const char *cellbuf = data; 476 size_t cellsize = tok.length; 477 data += cellsize; 478 size -= cellsize; 479 480 if (cellarrsize >= 2) { 481 NEXT_TYPE(tok, MPACK_TOKEN_SINT); 482 g->cur_attr = (int)tok.data.value.lo; 483 } 484 485 int repeat = 1; 486 if (cellarrsize >= 3) { 487 NEXT_TYPE(tok, MPACK_TOKEN_UINT); 488 repeat = (int)tok.data.value.lo; 489 } 490 491 g->clear_width = 0; 492 if (g->icell == g->ncells - 1 && cellsize == 1 && cellbuf[0] == ' ' && repeat > 1) { 493 g->clear_width = repeat; 494 } else { 495 schar_T sc = schar_from_buf(cellbuf, cellsize); 496 for (int r = 0; r < repeat; r++) { 497 if (g->coloff >= (int)grid_line_buf_size) { 498 p->state = -1; 499 return false; 500 } 501 grid_line_buf_char[g->coloff] = sc; 502 grid_line_buf_attr[g->coloff++] = g->cur_attr; 503 } 504 } 505 506 p->read_ptr = data; 507 p->read_size = size; 508 } 509 p->state = 16; 510 FALLTHROUGH; 511 512 case 16: 513 NEXT_TYPE(tok, MPACK_TOKEN_BOOLEAN); 514 g->wrap = mpack_unpack_boolean(tok); 515 p->read_ptr = data; 516 p->read_size = size; 517 return true; 518 519 case 12: 520 return true; 521 522 default: 523 abort(); 524 } 525 } 526 527 /// Requires a complete string. safe to use e.g. in shada as we have loaded a 528 /// complete shada item into a linear buffer. 529 /// 530 /// Data and size are preserved in cause of failure. 531 /// 532 /// @return "data" is NULL only when failure (non-null data and size=0 for 533 /// valid empty string) 534 String unpack_string(const char **data, size_t *size) 535 { 536 const char *data2 = *data; 537 size_t size2 = *size; 538 mpack_token_t tok; 539 540 // TODO(bfredl): this code is hot a f, specialize! 541 int result = mpack_rtoken(&data2, &size2, &tok); 542 if (result || (tok.type != MPACK_TOKEN_STR && tok.type != MPACK_TOKEN_BIN)) { 543 return (String)STRING_INIT; 544 } 545 if (*size < tok.length) { 546 // result = MPACK_EOF; 547 return (String)STRING_INIT; 548 } 549 (*data) = data2 + tok.length; 550 (*size) = size2 - tok.length; 551 return cbuf_as_string((char *)data2, tok.length); 552 } 553 554 /// @return -1 if not an array or EOF. otherwise size of valid array 555 ssize_t unpack_array(const char **data, size_t *size) 556 { 557 // TODO(bfredl): this code is hot, specialize! 558 mpack_token_t tok; 559 int result = mpack_rtoken(data, size, &tok); 560 if (result || tok.type != MPACK_TOKEN_ARRAY) { 561 return -1; 562 } 563 return tok.length; 564 } 565 566 /// does not keep "data" untouched on failure 567 bool unpack_integer(const char **data, size_t *size, Integer *res) 568 { 569 mpack_token_t tok; 570 int result = mpack_rtoken(data, size, &tok); 571 if (result) { 572 return false; 573 } 574 return unpack_uint_or_sint(tok, res); 575 } 576 577 bool unpack_uint_or_sint(mpack_token_t tok, Integer *res) 578 { 579 if (tok.type == MPACK_TOKEN_UINT) { 580 *res = (Integer)mpack_unpack_uint(tok); 581 return true; 582 } else if (tok.type == MPACK_TOKEN_SINT) { 583 *res = (Integer)mpack_unpack_sint(tok); 584 return true; 585 } 586 return false; 587 } 588 589 static void parse_nop(mpack_parser_t *parser, mpack_node_t *node) 590 { 591 } 592 593 int unpack_skip(const char **data, size_t *size) 594 { 595 mpack_parser_t parser; 596 mpack_parser_init(&parser, 0); 597 598 return mpack_parse(&parser, data, size, parse_nop, parse_nop); 599 } 600 601 void push_additional_data(AdditionalDataBuilder *ad, const char *data, size_t size) 602 { 603 if (kv_size(*ad) == 0) { 604 AdditionalData init = { 0 }; 605 kv_concat_len(*ad, &init, sizeof(init)); 606 } 607 AdditionalData *a = (AdditionalData *)ad->items; 608 a->nitems++; 609 a->nbytes += (uint32_t)size; 610 kv_concat_len(*ad, data, size); 611 } 612 613 // currently only used for shada, so not re-entrant like unpacker_parse_redraw 614 bool unpack_keydict(void *retval, FieldHashfn hashy, AdditionalDataBuilder *ad, const char **data, 615 size_t *restrict size, char **error) 616 { 617 OptKeySet *ks = (OptKeySet *)retval; 618 mpack_token_t tok; 619 620 int result = mpack_rtoken(data, size, &tok); 621 if (result || tok.type != MPACK_TOKEN_MAP) { 622 *error = xstrdup("is not a dict"); 623 return false; 624 } 625 626 size_t map_size = tok.length; 627 628 for (size_t i = 0; i < map_size; i++) { 629 const char *item_start = *data; 630 // TODO(bfredl): we could specialize a hot path for FIXSTR here 631 String key = unpack_string(data, size); 632 if (!key.data) { 633 *error = arena_printf(NULL, "has key value which is not a string").data; 634 return false; 635 } else if (key.size == 0) { 636 *error = arena_printf(NULL, "has empty key").data; 637 return false; 638 } 639 KeySetLink *field = hashy(key.data, key.size); 640 641 if (!field) { 642 int status = unpack_skip(data, size); 643 if (status) { 644 return false; 645 } 646 647 if (ad) { 648 push_additional_data(ad, item_start, (size_t)(*data - item_start)); 649 } 650 continue; 651 } 652 653 assert(field->opt_index >= 0); 654 uint64_t flag = (1ULL << field->opt_index); 655 if (ks->is_set_ & flag) { 656 *error = xstrdup("duplicate key"); 657 return false; 658 } 659 ks->is_set_ |= flag; 660 661 char *mem = ((char *)retval + field->ptr_off); 662 switch (field->type) { 663 case kObjectTypeBoolean: 664 if (*size == 0 || (**data & 0xfe) != 0xc2) { 665 *error = arena_printf(NULL, "has %.*s key value which is not a boolean", (int)key.size, 666 key.data).data; 667 return false; 668 } 669 *(Boolean *)mem = **data & 0x01; 670 (*data)++; (*size)--; 671 break; 672 673 case kObjectTypeInteger: 674 if (!unpack_integer(data, size, (Integer *)mem)) { 675 *error = arena_printf(NULL, "has %.*s key value which is not an integer", (int)key.size, 676 key.data).data; 677 return false; 678 } 679 break; 680 681 case kObjectTypeString: { 682 String val = unpack_string(data, size); 683 if (!val.data) { 684 *error = arena_printf(NULL, "has %.*s key value which is not a binary", (int)key.size, 685 key.data).data; 686 return false; 687 } 688 *(String *)mem = val; 689 break; 690 } 691 692 case kUnpackTypeStringArray: { 693 ssize_t len = unpack_array(data, size); 694 if (len < 0) { 695 *error = arena_printf(NULL, "has %.*s key with non-array value", (int)key.size, 696 key.data).data; 697 return false; 698 } 699 StringArray *a = (StringArray *)mem; 700 kv_ensure_space(*a, (size_t)len); 701 for (size_t j = 0; j < (size_t)len; j++) { 702 String item = unpack_string(data, size); 703 if (!item.data) { 704 *error = arena_printf(NULL, "has %.*s array with non-binary value", (int)key.size, 705 key.data).data; 706 return false; 707 } 708 kv_push(*a, item); 709 } 710 break; 711 } 712 713 default: 714 abort(); // not supported 715 } 716 } 717 718 return true; 719 }