neovim

Neovim text editor
git clone https://git.dasho.dev/neovim.git
Log | Files | Refs | README

decode.c (38328B)


      1 #include <assert.h>
      2 #include <stdbool.h>
      3 #include <stddef.h>
      4 #include <stdint.h>
      5 #include <stdlib.h>
      6 #include <string.h>
      7 
      8 #include "klib/kvec.h"
      9 #include "mpack/conv.h"
     10 #include "mpack/mpack_core.h"
     11 #include "mpack/object.h"
     12 #include "nvim/ascii_defs.h"
     13 #include "nvim/charset.h"
     14 #include "nvim/eval.h"
     15 #include "nvim/eval/decode.h"
     16 #include "nvim/eval/encode.h"
     17 #include "nvim/eval/typval.h"
     18 #include "nvim/eval/typval_defs.h"
     19 #include "nvim/eval/vars.h"
     20 #include "nvim/eval_defs.h"
     21 #include "nvim/garray.h"
     22 #include "nvim/gettext_defs.h"
     23 #include "nvim/macros_defs.h"
     24 #include "nvim/mbyte.h"
     25 #include "nvim/memory.h"
     26 #include "nvim/message.h"
     27 #include "nvim/vim_defs.h"
     28 
     29 /// Helper structure for container_struct
     30 typedef struct {
     31  size_t stack_index;   ///< Index of current container in stack.
     32  list_T *special_val;  ///< _VAL key contents for special maps.
     33                        ///< When container is not a special dictionary it is
     34                        ///< NULL.
     35  const char *s;        ///< Location where container starts.
     36  typval_T container;   ///< Container. Either VAR_LIST, VAR_DICT or VAR_LIST
     37                        ///< which is _VAL from special dictionary.
     38 } ContainerStackItem;
     39 
     40 /// Helper structure for values struct
     41 typedef struct {
     42  bool is_special_string;  ///< Indicates that current value is a special
     43                           ///< dictionary with string.
     44  bool didcomma;           ///< True if previous token was comma.
     45  bool didcolon;           ///< True if previous token was colon.
     46  typval_T val;            ///< Actual value.
     47 } ValuesStackItem;
     48 
     49 /// Vector containing values not yet saved in any container
     50 typedef kvec_t(ValuesStackItem) ValuesStack;
     51 
     52 /// Vector containing containers, each next container is located inside previous
     53 typedef kvec_t(ContainerStackItem) ContainerStack;
     54 
     55 #include "eval/decode.c.generated.h"
     56 
     57 /// Create special dictionary
     58 ///
     59 /// @param[out]  rettv  Location where created dictionary will be saved.
     60 /// @param[in]  type  Type of the dictionary.
     61 /// @param[in]  val  Value associated with the _VAL key.
     62 static inline void create_special_dict(typval_T *const rettv, const MessagePackType type,
     63                                       typval_T val)
     64  FUNC_ATTR_NONNULL_ALL
     65 {
     66  dict_T *const dict = tv_dict_alloc();
     67  dictitem_T *const type_di = tv_dict_item_alloc_len(S_LEN("_TYPE"));
     68  type_di->di_tv.v_type = VAR_LIST;
     69  type_di->di_tv.v_lock = VAR_UNLOCKED;
     70  type_di->di_tv.vval.v_list = (list_T *)eval_msgpack_type_lists[type];
     71  tv_list_ref(type_di->di_tv.vval.v_list);
     72  tv_dict_add(dict, type_di);
     73  dictitem_T *const val_di = tv_dict_item_alloc_len(S_LEN("_VAL"));
     74  val_di->di_tv = val;
     75  tv_dict_add(dict, val_di);
     76  dict->dv_refcount++;
     77  *rettv = (typval_T) {
     78    .v_type = VAR_DICT,
     79    .v_lock = VAR_UNLOCKED,
     80    .vval = { .v_dict = dict },
     81  };
     82 }
     83 
     84 #define DICT_LEN(dict) (dict)->dv_hashtab.ht_used
     85 
     86 /// Helper function used for working with stack vectors used by JSON decoder
     87 ///
     88 /// @param[in,out]  obj  New object. Will either be put into the stack (and,
     89 ///                      probably, also inside container) or freed.
     90 /// @param[out]  stack  Object stack.
     91 /// @param[out]  container_stack  Container objects stack.
     92 /// @param[in,out]  pp  Position in string which is currently being parsed. Used
     93 ///                     for error reporting and is also set when decoding is
     94 ///                     restarted due to the necessity of converting regular
     95 ///                     dictionary to a special map.
     96 /// @param[out]  next_map_special  Is set to true when dictionary needs to be
     97 ///                                converted to a special map, otherwise not
     98 ///                                touched. Indicates that decoding has been
     99 ///                                restarted.
    100 /// @param[out]  didcomma  True if previous token was comma. Is set to recorded
    101 ///                        value when decoder is restarted, otherwise unused.
    102 /// @param[out]  didcolon  True if previous token was colon. Is set to recorded
    103 ///                        value when decoder is restarted, otherwise unused.
    104 ///
    105 /// @return OK in case of success, FAIL in case of error.
    106 static inline int json_decoder_pop(ValuesStackItem obj, ValuesStack *const stack,
    107                                   ContainerStack *const container_stack, const char **const pp,
    108                                   bool *const next_map_special, bool *const didcomma,
    109                                   bool *const didcolon)
    110  FUNC_ATTR_NONNULL_ALL
    111 {
    112  if (kv_size(*container_stack) == 0) {
    113    kv_push(*stack, obj);
    114    return OK;
    115  }
    116  ContainerStackItem last_container = kv_last(*container_stack);
    117  const char *val_location = *pp;
    118  if (obj.val.v_type == last_container.container.v_type
    119      // vval.v_list and vval.v_dict should have the same size and offset
    120      && ((void *)obj.val.vval.v_list
    121          == (void *)last_container.container.vval.v_list)) {
    122    (void)kv_pop(*container_stack);
    123    val_location = last_container.s;
    124    last_container = kv_last(*container_stack);
    125  }
    126  if (last_container.container.v_type == VAR_LIST) {
    127    if (tv_list_len(last_container.container.vval.v_list) != 0
    128        && !obj.didcomma) {
    129      semsg(_("E474: Expected comma before list item: %s"), val_location);
    130      tv_clear(&obj.val);
    131      return FAIL;
    132    }
    133    assert(last_container.special_val == NULL);
    134    tv_list_append_owned_tv(last_container.container.vval.v_list, obj.val);
    135  } else if (last_container.stack_index == kv_size(*stack) - 2) {
    136    if (!obj.didcolon) {
    137      semsg(_("E474: Expected colon before dictionary value: %s"),
    138            val_location);
    139      tv_clear(&obj.val);
    140      return FAIL;
    141    }
    142    ValuesStackItem key = kv_pop(*stack);
    143    if (last_container.special_val == NULL) {
    144      // These cases should have already been handled.
    145      assert(!(key.is_special_string || key.val.vval.v_string == NULL));
    146      dictitem_T *const obj_di = tv_dict_item_alloc(key.val.vval.v_string);
    147      tv_clear(&key.val);
    148      if (tv_dict_add(last_container.container.vval.v_dict, obj_di)
    149          == FAIL) {
    150        abort();
    151      }
    152      obj_di->di_tv = obj.val;
    153    } else {
    154      list_T *const kv_pair = tv_list_alloc(2);
    155      tv_list_append_list(last_container.special_val, kv_pair);
    156      tv_list_append_owned_tv(kv_pair, key.val);
    157      tv_list_append_owned_tv(kv_pair, obj.val);
    158    }
    159  } else {
    160    // Object with key only
    161    if (!obj.is_special_string && obj.val.v_type != VAR_STRING) {
    162      semsg(_("E474: Expected string key: %s"), *pp);
    163      tv_clear(&obj.val);
    164      return FAIL;
    165    } else if (!obj.didcomma
    166               && (last_container.special_val == NULL
    167                   && (DICT_LEN(last_container.container.vval.v_dict) != 0))) {
    168      semsg(_("E474: Expected comma before dictionary key: %s"), val_location);
    169      tv_clear(&obj.val);
    170      return FAIL;
    171    }
    172    // Handle special dictionaries
    173    if (last_container.special_val == NULL
    174        && (obj.is_special_string
    175            || obj.val.vval.v_string == NULL
    176            || tv_dict_find(last_container.container.vval.v_dict, obj.val.vval.v_string, -1))) {
    177      tv_clear(&obj.val);
    178 
    179      // Restart
    180      (void)kv_pop(*container_stack);
    181      ValuesStackItem last_container_val =
    182        kv_A(*stack, last_container.stack_index);
    183      while (kv_size(*stack) > last_container.stack_index) {
    184        tv_clear(&(kv_pop(*stack).val));
    185      }
    186      *pp = last_container.s;
    187      *didcomma = last_container_val.didcomma;
    188      *didcolon = last_container_val.didcolon;
    189      *next_map_special = true;
    190      return OK;
    191    }
    192    kv_push(*stack, obj);
    193  }
    194  return OK;
    195 }
    196 
    197 #define LENP(p, e) \
    198  ((int)((e) - (p))), (p)
    199 #define OBJ(obj_tv, is_sp_string, didcomma_, didcolon_) \
    200  ((ValuesStackItem) { \
    201    .is_special_string = (is_sp_string), \
    202    .val = (obj_tv), \
    203    .didcomma = (didcomma_), \
    204    .didcolon = (didcolon_), \
    205  })
    206 
    207 #define POP(obj_tv, is_sp_string) \
    208  do { \
    209    if (json_decoder_pop(OBJ(obj_tv, is_sp_string, *didcomma, *didcolon), \
    210                         stack, container_stack, \
    211                         &p, next_map_special, didcomma, didcolon) \
    212        == FAIL) { \
    213      goto parse_json_string_fail; \
    214    } \
    215    if (*next_map_special) { \
    216      goto parse_json_string_ret; \
    217    } \
    218  } while (0)
    219 
    220 /// Create a new special dictionary that ought to represent a MAP
    221 ///
    222 /// @param[out]  ret_tv  Address where new special dictionary is saved.
    223 /// @param[in]  len  Expected number of items to be populated before list
    224 ///                  becomes accessible from Vimscript. It is still valid to
    225 ///                  underpopulate a list, value only controls how many elements
    226 ///                  will be allocated in advance. @see ListLenSpecials.
    227 ///
    228 /// @return [allocated] list which should contain key-value pairs. Return value
    229 ///                     may be safely ignored.
    230 list_T *decode_create_map_special_dict(typval_T *const ret_tv, const ptrdiff_t len)
    231  FUNC_ATTR_NONNULL_ALL
    232 {
    233  list_T *const list = tv_list_alloc(len);
    234  tv_list_ref(list);
    235  create_special_dict(ret_tv, kMPMap, ((typval_T) {
    236    .v_type = VAR_LIST,
    237    .v_lock = VAR_UNLOCKED,
    238    .vval = { .v_list = list },
    239  }));
    240  return list;
    241 }
    242 
    243 /// Convert char* string to typval_T
    244 ///
    245 /// Depending on whether string has (no) NUL bytes, it may use a special
    246 /// dictionary, VAR_BLOB, or decode string to VAR_STRING.
    247 ///
    248 /// @param[in]  s  String to decode.
    249 /// @param[in]  len  String length.
    250 /// @param[in]  force_blob  whether string always should be decoded as a blob,
    251 ///                         or only when embedded NUL bytes were present
    252 /// @param[in]  s_allocated  If true, then `s` was allocated and can be saved in
    253 ///                          a returned structure. If it is not saved there, it
    254 ///                          will be freed.
    255 ///
    256 /// @return Decoded string.
    257 typval_T decode_string(const char *const s, const size_t len, bool force_blob,
    258                       const bool s_allocated)
    259  FUNC_ATTR_WARN_UNUSED_RESULT
    260 {
    261  assert(s != NULL || len == 0);
    262  const bool use_blob = force_blob || ((s != NULL) && (memchr(s, NUL, len) != NULL));
    263  if (use_blob) {
    264    typval_T tv;
    265    tv.v_lock = VAR_UNLOCKED;
    266    blob_T *b = tv_blob_alloc_ret(&tv);
    267    if (s_allocated) {
    268      b->bv_ga.ga_data = (void *)s;
    269      b->bv_ga.ga_len = (int)len;
    270      b->bv_ga.ga_maxlen = (int)len;
    271    } else {
    272      ga_concat_len(&b->bv_ga, s, len);
    273    }
    274    return tv;
    275  }
    276  return (typval_T) {
    277    .v_type = VAR_STRING,
    278    .v_lock = VAR_UNLOCKED,
    279    .vval = { .v_string = ((s == NULL || s_allocated) ? (char *)s : xmemdupz(s, len)) },
    280  };
    281 }
    282 
    283 /// Parse JSON double-quoted string
    284 ///
    285 /// @param[in]  buf  Buffer being converted.
    286 /// @param[in]  buf_len  Length of the buffer.
    287 /// @param[in,out]  pp  Pointer to the start of the string. Must point to '"'.
    288 ///                     Is advanced to the closing '"'. Also see
    289 ///                     json_decoder_pop(), it may set pp to another location
    290 ///                     and alter next_map_special, didcomma and didcolon.
    291 /// @param[out]  stack  Object stack.
    292 /// @param[out]  container_stack  Container objects stack.
    293 /// @param[out]  next_map_special  Is set to true when dictionary is converted
    294 ///                                to a special map, otherwise not touched.
    295 /// @param[out]  didcomma  True if previous token was comma. Is set to recorded
    296 ///                        value when decoder is restarted, otherwise unused.
    297 /// @param[out]  didcolon  True if previous token was colon. Is set to recorded
    298 ///                        value when decoder is restarted, otherwise unused.
    299 ///
    300 /// @return OK in case of success, FAIL in case of error.
    301 static inline int parse_json_string(const char *const buf, const size_t buf_len,
    302                                    const char **const pp, ValuesStack *const stack,
    303                                    ContainerStack *const container_stack,
    304                                    bool *const next_map_special, bool *const didcomma,
    305                                    bool *const didcolon)
    306  FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_ALWAYS_INLINE
    307 {
    308  const char *const e = buf + buf_len;
    309  const char *p = *pp;
    310  size_t len = 0;
    311  const char *const s = ++p;
    312  int ret = OK;
    313  while (p < e && *p != '"') {
    314    if (*p == '\\') {
    315      p++;
    316      if (p == e) {
    317        semsg(_("E474: Unfinished escape sequence: %.*s"),
    318              (int)buf_len, buf);
    319        goto parse_json_string_fail;
    320      }
    321      switch (*p) {
    322      case 'u':
    323        if (p + 4 >= e) {
    324          semsg(_("E474: Unfinished unicode escape sequence: %.*s"),
    325                (int)buf_len, buf);
    326          goto parse_json_string_fail;
    327        } else if (!ascii_isxdigit(p[1])
    328                   || !ascii_isxdigit(p[2])
    329                   || !ascii_isxdigit(p[3])
    330                   || !ascii_isxdigit(p[4])) {
    331          semsg(_("E474: Expected four hex digits after \\u: %.*s"),
    332                LENP(p - 1, e));
    333          goto parse_json_string_fail;
    334        }
    335        // One UTF-8 character below U+10000 can take up to 3 bytes,
    336        // above up to 6, but they are encoded using two \u escapes.
    337        len += 3;
    338        p += 5;
    339        break;
    340      case '\\':
    341      case '/':
    342      case '"':
    343      case 't':
    344      case 'b':
    345      case 'n':
    346      case 'r':
    347      case 'f':
    348        len++;
    349        p++;
    350        break;
    351      default:
    352        semsg(_("E474: Unknown escape sequence: %.*s"), LENP(p - 1, e));
    353        goto parse_json_string_fail;
    354      }
    355    } else {
    356      uint8_t p_byte = (uint8_t)(*p);
    357      // unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
    358      if (p_byte < 0x20) {
    359        semsg(_("E474: ASCII control characters cannot be present "
    360                "inside string: %.*s"), LENP(p, e));
    361        goto parse_json_string_fail;
    362      }
    363      const int ch = utf_ptr2char(p);
    364      // All characters above U+007F are encoded using two or more bytes
    365      // and thus cannot possibly be equal to *p. But utf_ptr2char({0xFF,
    366      // 0}) will return 0xFF, even though 0xFF cannot start any UTF-8
    367      // code point at all.
    368      //
    369      // The only exception is U+00C3 which is represented as 0xC3 0x83.
    370      if (ch >= 0x80 && p_byte == ch
    371          && !(ch == 0xC3 && p + 1 < e && (uint8_t)p[1] == 0x83)) {
    372        semsg(_("E474: Only UTF-8 strings allowed: %.*s"), LENP(p, e));
    373        goto parse_json_string_fail;
    374      } else if (ch > 0x10FFFF) {
    375        semsg(_("E474: Only UTF-8 code points up to U+10FFFF "
    376                "are allowed to appear unescaped: %.*s"), LENP(p, e));
    377        goto parse_json_string_fail;
    378      }
    379      const size_t ch_len = (size_t)utf_char2len(ch);
    380      assert(ch_len == (size_t)(ch ? utf_ptr2len(p) : 1));
    381      len += ch_len;
    382      p += ch_len;
    383    }
    384  }
    385  if (p == e || *p != '"') {
    386    semsg(_("E474: Expected string end: %.*s"), (int)buf_len, buf);
    387    goto parse_json_string_fail;
    388  }
    389  char *str = xmalloc(len + 1);
    390  int fst_in_pair = 0;
    391  char *str_end = str;
    392 #define PUT_FST_IN_PAIR(fst_in_pair, str_end) \
    393  do { \
    394    if ((fst_in_pair) != 0) { \
    395      (str_end) += utf_char2bytes(fst_in_pair, (str_end)); \
    396      (fst_in_pair) = 0; \
    397    } \
    398  } while (0)
    399  for (const char *t = s; t < p; t++) {
    400    if (t[0] != '\\' || t[1] != 'u') {
    401      PUT_FST_IN_PAIR(fst_in_pair, str_end);
    402    }
    403    if (*t == '\\') {
    404      t++;
    405      switch (*t) {
    406      case 'u': {
    407        const char ubuf[] = { t[1], t[2], t[3], t[4] };
    408        t += 4;
    409        uvarnumber_T ch;
    410        vim_str2nr(ubuf, NULL, NULL,
    411                   STR2NR_HEX | STR2NR_FORCE, NULL, &ch, 4, true, NULL);
    412        if (SURROGATE_HI_START <= ch && ch <= SURROGATE_HI_END) {
    413          PUT_FST_IN_PAIR(fst_in_pair, str_end);
    414          fst_in_pair = (int)ch;
    415        } else if (SURROGATE_LO_START <= ch && ch <= SURROGATE_LO_END
    416                   && fst_in_pair != 0) {
    417          const int full_char = ((int)(ch - SURROGATE_LO_START)
    418                                 + ((fst_in_pair - SURROGATE_HI_START) << 10)
    419                                 + SURROGATE_FIRST_CHAR);
    420          str_end += utf_char2bytes(full_char, str_end);
    421          fst_in_pair = 0;
    422        } else {
    423          PUT_FST_IN_PAIR(fst_in_pair, str_end);
    424          str_end += utf_char2bytes((int)ch, str_end);
    425        }
    426        break;
    427      }
    428      case '\\':
    429      case '/':
    430      case '"':
    431      case 't':
    432      case 'b':
    433      case 'n':
    434      case 'r':
    435      case 'f': {
    436        static const char escapes[] = {
    437          ['\\'] = '\\',
    438          ['/'] = '/',
    439          ['"'] = '"',
    440          ['t'] = TAB,
    441          ['b'] = BS,
    442          ['n'] = NL,
    443          ['r'] = CAR,
    444          ['f'] = FF,
    445        };
    446        *str_end++ = escapes[(int)(*t)];
    447        break;
    448      }
    449      default:
    450        abort();
    451      }
    452    } else {
    453      *str_end++ = *t;
    454    }
    455  }
    456  PUT_FST_IN_PAIR(fst_in_pair, str_end);
    457 #undef PUT_FST_IN_PAIR
    458  *str_end = NUL;
    459  typval_T obj = decode_string(str, (size_t)(str_end - str), false, true);
    460  POP(obj, obj.v_type != VAR_STRING);
    461  goto parse_json_string_ret;
    462 parse_json_string_fail:
    463  ret = FAIL;
    464 parse_json_string_ret:
    465  *pp = p;
    466  return ret;
    467 }
    468 
    469 #undef POP
    470 
    471 /// Parse JSON number: both floating-point and integer
    472 ///
    473 /// Number format: `-?\d+(?:.\d+)?(?:[eE][+-]?\d+)?`.
    474 ///
    475 /// @param[in]  buf  Buffer being converted.
    476 /// @param[in]  buf_len  Length of the buffer.
    477 /// @param[in,out]  pp  Pointer to the start of the number. Must point to
    478 ///                     a digit or a minus sign. Is advanced to the last
    479 ///                     character of the number. Also see json_decoder_pop(), it
    480 ///                     may set pp to another location and alter
    481 ///                     next_map_special, didcomma and didcolon.
    482 /// @param[out]  stack  Object stack.
    483 /// @param[out]  container_stack  Container objects stack.
    484 /// @param[out]  next_map_special  Is set to true when dictionary is converted
    485 ///                                to a special map, otherwise not touched.
    486 /// @param[out]  didcomma  True if previous token was comma. Is set to recorded
    487 ///                        value when decoder is restarted, otherwise unused.
    488 /// @param[out]  didcolon  True if previous token was colon. Is set to recorded
    489 ///                        value when decoder is restarted, otherwise unused.
    490 ///
    491 /// @return OK in case of success, FAIL in case of error.
    492 static inline int parse_json_number(const char *const buf, const size_t buf_len,
    493                                    const char **const pp, ValuesStack *const stack,
    494                                    ContainerStack *const container_stack,
    495                                    bool *const next_map_special, bool *const didcomma,
    496                                    bool *const didcolon)
    497  FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_ALWAYS_INLINE
    498 {
    499  const char *const e = buf + buf_len;
    500  const char *p = *pp;
    501  int ret = OK;
    502  const char *const s = p;
    503  const char *ints = NULL;
    504  const char *fracs = NULL;
    505  const char *exps = NULL;
    506  const char *exps_s = NULL;
    507  if (*p == '-') {
    508    p++;
    509  }
    510  ints = p;
    511  if (p >= e) {
    512    goto parse_json_number_check;
    513  }
    514  while (p < e && ascii_isdigit(*p)) {
    515    p++;
    516  }
    517  if (p != ints + 1 && *ints == '0') {
    518    semsg(_("E474: Leading zeroes are not allowed: %.*s"), LENP(s, e));
    519    goto parse_json_number_fail;
    520  }
    521  if (p >= e || p == ints) {
    522    goto parse_json_number_check;
    523  }
    524  if (*p == '.') {
    525    p++;
    526    fracs = p;
    527    while (p < e && ascii_isdigit(*p)) {
    528      p++;
    529    }
    530    if (p >= e || p == fracs) {
    531      goto parse_json_number_check;
    532    }
    533  }
    534  if (*p == 'e' || *p == 'E') {
    535    p++;
    536    exps_s = p;
    537    if (p < e && (*p == '-' || *p == '+')) {
    538      p++;
    539    }
    540    exps = p;
    541    while (p < e && ascii_isdigit(*p)) {
    542      p++;
    543    }
    544  }
    545 parse_json_number_check:
    546  if (p == ints) {
    547    semsg(_("E474: Missing number after minus sign: %.*s"), LENP(s, e));
    548    goto parse_json_number_fail;
    549  } else if (p == fracs || (fracs != NULL && exps_s == fracs + 1)) {
    550    semsg(_("E474: Missing number after decimal dot: %.*s"), LENP(s, e));
    551    goto parse_json_number_fail;
    552  } else if (p == exps) {
    553    semsg(_("E474: Missing exponent: %.*s"), LENP(s, e));
    554    goto parse_json_number_fail;
    555  }
    556  typval_T tv = {
    557    .v_type = VAR_NUMBER,
    558    .v_lock = VAR_UNLOCKED,
    559  };
    560  const size_t exp_num_len = (size_t)(p - s);
    561  if (fracs || exps) {
    562    // Convert floating-point number
    563    const size_t num_len = string2float(s, &tv.vval.v_float);
    564    if (exp_num_len != num_len) {
    565      semsg(_("E685: internal error: while converting number \"%.*s\" "
    566              "to float string2float consumed %zu bytes in place of %zu"),
    567            (int)exp_num_len, s, num_len, exp_num_len);
    568    }
    569    tv.v_type = VAR_FLOAT;
    570  } else {
    571    // Convert integer
    572    varnumber_T nr;
    573    int num_len;
    574    vim_str2nr(s, NULL, &num_len, 0, &nr, NULL, (int)(p - s), true, NULL);
    575    if ((int)exp_num_len != num_len) {
    576      semsg(_("E685: internal error: while converting number \"%.*s\" "
    577              "to integer vim_str2nr consumed %i bytes in place of %zu"),
    578            (int)exp_num_len, s, num_len, exp_num_len);
    579    }
    580    tv.vval.v_number = nr;
    581  }
    582  if (json_decoder_pop(OBJ(tv, false, *didcomma, *didcolon),
    583                       stack, container_stack,
    584                       &p, next_map_special, didcomma, didcolon) == FAIL) {
    585    goto parse_json_number_fail;
    586  }
    587  if (*next_map_special) {
    588    goto parse_json_number_ret;
    589  }
    590  p--;
    591  goto parse_json_number_ret;
    592 parse_json_number_fail:
    593  ret = FAIL;
    594 parse_json_number_ret:
    595  *pp = p;
    596  return ret;
    597 }
    598 
    599 #define POP(obj_tv, is_sp_string) \
    600  do { \
    601    if (json_decoder_pop(OBJ(obj_tv, is_sp_string, didcomma, didcolon), \
    602                         &stack, &container_stack, \
    603                         &p, &next_map_special, &didcomma, &didcolon) \
    604        == FAIL) { \
    605      goto json_decode_string_fail; \
    606    } \
    607    if (next_map_special) { \
    608      goto json_decode_string_cycle_start; \
    609    } \
    610  } while (0)
    611 
    612 /// Convert JSON string into Vimscript object
    613 ///
    614 /// @param[in]  buf  String to convert. UTF-8 encoding is assumed.
    615 /// @param[in]  buf_len  Length of the string.
    616 /// @param[out]  rettv  Location where to save results.
    617 ///
    618 /// @return OK in case of success, FAIL otherwise.
    619 int json_decode_string(const char *const buf, const size_t buf_len, typval_T *const rettv)
    620  FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT
    621 {
    622  const char *p = buf;
    623  const char *const e = buf + buf_len;
    624  while (p < e && (*p == ' ' || *p == TAB || *p == NL || *p == CAR)) {
    625    p++;
    626  }
    627  if (p == e) {
    628    emsg(_("E474: Attempt to decode a blank string"));
    629    return FAIL;
    630  }
    631  int ret = OK;
    632  ValuesStack stack = KV_INITIAL_VALUE;
    633  ContainerStack container_stack = KV_INITIAL_VALUE;
    634  rettv->v_type = VAR_UNKNOWN;
    635  bool didcomma = false;
    636  bool didcolon = false;
    637  bool next_map_special = false;
    638  for (; p < e; p++) {
    639 json_decode_string_cycle_start:
    640    assert(*p == '{' || next_map_special == false);
    641    switch (*p) {
    642    case '}':
    643    case ']': {
    644      if (kv_size(container_stack) == 0) {
    645        semsg(_("E474: No container to close: %.*s"), LENP(p, e));
    646        goto json_decode_string_fail;
    647      }
    648      ContainerStackItem last_container = kv_last(container_stack);
    649      if (*p == '}' && last_container.container.v_type != VAR_DICT) {
    650        semsg(_("E474: Closing list with curly bracket: %.*s"), LENP(p, e));
    651        goto json_decode_string_fail;
    652      } else if (*p == ']' && last_container.container.v_type != VAR_LIST) {
    653        semsg(_("E474: Closing dictionary with square bracket: %.*s"),
    654              LENP(p, e));
    655        goto json_decode_string_fail;
    656      } else if (didcomma) {
    657        semsg(_("E474: Trailing comma: %.*s"), LENP(p, e));
    658        goto json_decode_string_fail;
    659      } else if (didcolon) {
    660        semsg(_("E474: Expected value after colon: %.*s"), LENP(p, e));
    661        goto json_decode_string_fail;
    662      } else if (last_container.stack_index != kv_size(stack) - 1) {
    663        assert(last_container.stack_index < kv_size(stack) - 1);
    664        semsg(_("E474: Expected value: %.*s"), LENP(p, e));
    665        goto json_decode_string_fail;
    666      }
    667      if (kv_size(stack) == 1) {
    668        p++;
    669        (void)kv_pop(container_stack);
    670        goto json_decode_string_after_cycle;
    671      } else {
    672        if (json_decoder_pop(kv_pop(stack), &stack, &container_stack, &p,
    673                             &next_map_special, &didcomma, &didcolon)
    674            == FAIL) {
    675          goto json_decode_string_fail;
    676        }
    677        assert(!next_map_special);
    678        break;
    679      }
    680    }
    681    case ',': {
    682      if (kv_size(container_stack) == 0) {
    683        semsg(_("E474: Comma not inside container: %.*s"), LENP(p, e));
    684        goto json_decode_string_fail;
    685      }
    686      ContainerStackItem last_container = kv_last(container_stack);
    687      if (didcomma) {
    688        semsg(_("E474: Duplicate comma: %.*s"), LENP(p, e));
    689        goto json_decode_string_fail;
    690      } else if (didcolon) {
    691        semsg(_("E474: Comma after colon: %.*s"), LENP(p, e));
    692        goto json_decode_string_fail;
    693      } else if (last_container.container.v_type == VAR_DICT
    694                 && last_container.stack_index != kv_size(stack) - 1) {
    695        semsg(_("E474: Using comma in place of colon: %.*s"), LENP(p, e));
    696        goto json_decode_string_fail;
    697      } else if (last_container.special_val == NULL
    698                 ? (last_container.container.v_type == VAR_DICT
    699                    ? (DICT_LEN(last_container.container.vval.v_dict) == 0)
    700                    : (tv_list_len(last_container.container.vval.v_list)
    701                       == 0))
    702                 : (tv_list_len(last_container.special_val) == 0)) {
    703        semsg(_("E474: Leading comma: %.*s"), LENP(p, e));
    704        goto json_decode_string_fail;
    705      }
    706      didcomma = true;
    707      continue;
    708    }
    709    case ':': {
    710      if (kv_size(container_stack) == 0) {
    711        semsg(_("E474: Colon not inside container: %.*s"), LENP(p, e));
    712        goto json_decode_string_fail;
    713      }
    714      ContainerStackItem last_container = kv_last(container_stack);
    715      if (last_container.container.v_type != VAR_DICT) {
    716        semsg(_("E474: Using colon not in dictionary: %.*s"), LENP(p, e));
    717        goto json_decode_string_fail;
    718      } else if (last_container.stack_index != kv_size(stack) - 2) {
    719        semsg(_("E474: Unexpected colon: %.*s"), LENP(p, e));
    720        goto json_decode_string_fail;
    721      } else if (didcomma) {
    722        semsg(_("E474: Colon after comma: %.*s"), LENP(p, e));
    723        goto json_decode_string_fail;
    724      } else if (didcolon) {
    725        semsg(_("E474: Duplicate colon: %.*s"), LENP(p, e));
    726        goto json_decode_string_fail;
    727      }
    728      didcolon = true;
    729      continue;
    730    }
    731    case ' ':
    732    case TAB:
    733    case NL:
    734    case CAR:
    735      continue;
    736    case 'n':
    737      if ((p + 3) >= e || strncmp(p + 1, "ull", 3) != 0) {
    738        semsg(_("E474: Expected null: %.*s"), LENP(p, e));
    739        goto json_decode_string_fail;
    740      }
    741      p += 3;
    742      POP(((typval_T) {
    743        .v_type = VAR_SPECIAL,
    744        .v_lock = VAR_UNLOCKED,
    745        .vval = { .v_special = kSpecialVarNull },
    746      }), false);
    747      break;
    748    case 't':
    749      if ((p + 3) >= e || strncmp(p + 1, "rue", 3) != 0) {
    750        semsg(_("E474: Expected true: %.*s"), LENP(p, e));
    751        goto json_decode_string_fail;
    752      }
    753      p += 3;
    754      POP(((typval_T) {
    755        .v_type = VAR_BOOL,
    756        .v_lock = VAR_UNLOCKED,
    757        .vval = { .v_bool = kBoolVarTrue },
    758      }), false);
    759      break;
    760    case 'f':
    761      if ((p + 4) >= e || strncmp(p + 1, "alse", 4) != 0) {
    762        semsg(_("E474: Expected false: %.*s"), LENP(p, e));
    763        goto json_decode_string_fail;
    764      }
    765      p += 4;
    766      POP(((typval_T) {
    767        .v_type = VAR_BOOL,
    768        .v_lock = VAR_UNLOCKED,
    769        .vval = { .v_bool = kBoolVarFalse },
    770      }), false);
    771      break;
    772    case '"':
    773      if (parse_json_string(buf, buf_len, &p, &stack, &container_stack,
    774                            &next_map_special, &didcomma, &didcolon)
    775          == FAIL) {
    776        // Error message was already given
    777        goto json_decode_string_fail;
    778      }
    779      if (next_map_special) {
    780        goto json_decode_string_cycle_start;
    781      }
    782      break;
    783    case '-':
    784    case '0':
    785    case '1':
    786    case '2':
    787    case '3':
    788    case '4':
    789    case '5':
    790    case '6':
    791    case '7':
    792    case '8':
    793    case '9':
    794      if (parse_json_number(buf, buf_len, &p, &stack, &container_stack,
    795                            &next_map_special, &didcomma, &didcolon)
    796          == FAIL) {
    797        // Error message was already given
    798        goto json_decode_string_fail;
    799      }
    800      if (next_map_special) {
    801        goto json_decode_string_cycle_start;
    802      }
    803      break;
    804    case '[': {
    805      list_T *list = tv_list_alloc(kListLenMayKnow);
    806      tv_list_ref(list);
    807      typval_T tv = {
    808        .v_type = VAR_LIST,
    809        .v_lock = VAR_UNLOCKED,
    810        .vval = { .v_list = list },
    811      };
    812      kv_push(container_stack, ((ContainerStackItem) { .stack_index = kv_size(stack),
    813                                                       .s = p,
    814                                                       .container = tv,
    815                                                       .special_val = NULL }));
    816      kv_push(stack, OBJ(tv, false, didcomma, didcolon));
    817      break;
    818    }
    819    case '{': {
    820      typval_T tv;
    821      list_T *val_list = NULL;
    822      if (next_map_special) {
    823        next_map_special = false;
    824        val_list = decode_create_map_special_dict(&tv, kListLenMayKnow);
    825      } else {
    826        dict_T *dict = tv_dict_alloc();
    827        dict->dv_refcount++;
    828        tv = (typval_T) {
    829          .v_type = VAR_DICT,
    830          .v_lock = VAR_UNLOCKED,
    831          .vval = { .v_dict = dict },
    832        };
    833      }
    834      kv_push(container_stack, ((ContainerStackItem) { .stack_index = kv_size(stack),
    835                                                       .s = p,
    836                                                       .container = tv,
    837                                                       .special_val = val_list }));
    838      kv_push(stack, OBJ(tv, false, didcomma, didcolon));
    839      break;
    840    }
    841    default:
    842      semsg(_("E474: Unidentified byte: %.*s"), LENP(p, e));
    843      goto json_decode_string_fail;
    844    }
    845    didcomma = false;
    846    didcolon = false;
    847    if (kv_size(container_stack) == 0) {
    848      p++;
    849      break;
    850    }
    851  }
    852 json_decode_string_after_cycle:
    853  for (; p < e; p++) {
    854    switch (*p) {
    855    case NL:
    856    case ' ':
    857    case TAB:
    858    case CAR:
    859      break;
    860    default:
    861      semsg(_("E474: Trailing characters: %.*s"), LENP(p, e));
    862      goto json_decode_string_fail;
    863    }
    864  }
    865  if (kv_size(stack) == 1 && kv_size(container_stack) == 0) {
    866    *rettv = kv_pop(stack).val;
    867    goto json_decode_string_ret;
    868  }
    869  semsg(_("E474: Unexpected end of input: %.*s"), (int)buf_len, buf);
    870 json_decode_string_fail:
    871  ret = FAIL;
    872  while (kv_size(stack)) {
    873    tv_clear(&(kv_pop(stack).val));
    874  }
    875 json_decode_string_ret:
    876  kv_destroy(stack);
    877  kv_destroy(container_stack);
    878  return ret;
    879 }
    880 
    881 #undef LENP
    882 #undef POP
    883 
    884 #undef OBJ
    885 
    886 #undef DICT_LEN
    887 
    888 static void positive_integer_to_special_typval(typval_T *rettv, uint64_t val)
    889 {
    890  if (val <= VARNUMBER_MAX) {
    891    *rettv = (typval_T) {
    892      .v_type = VAR_NUMBER,
    893      .v_lock = VAR_UNLOCKED,
    894      .vval = { .v_number = (varnumber_T)val },
    895    };
    896  } else {
    897    list_T *const list = tv_list_alloc(4);
    898    tv_list_ref(list);
    899    create_special_dict(rettv, kMPInteger, ((typval_T) {
    900      .v_type = VAR_LIST,
    901      .v_lock = VAR_UNLOCKED,
    902      .vval = { .v_list = list },
    903    }));
    904    tv_list_append_number(list, 1);
    905    tv_list_append_number(list, (varnumber_T)((val >> 62) & 0x3));
    906    tv_list_append_number(list, (varnumber_T)((val >> 31) & 0x7FFFFFFF));
    907    tv_list_append_number(list, (varnumber_T)(val & 0x7FFFFFFF));
    908  }
    909 }
    910 
    911 static void typval_parse_enter(mpack_parser_t *parser, mpack_node_t *node)
    912 {
    913  typval_T *result = NULL;
    914 
    915  mpack_node_t *parent = MPACK_PARENT_NODE(node);
    916  if (parent) {
    917    switch (parent->tok.type) {
    918    case MPACK_TOKEN_ARRAY: {
    919      list_T *list = parent->data[1].p;
    920      result = tv_list_append_owned_tv(list, (typval_T) { .v_type = VAR_UNKNOWN });
    921      break;
    922    }
    923    case MPACK_TOKEN_MAP: {
    924      typval_T(*items)[2] = parent->data[1].p;
    925      result = &items[parent->pos][parent->key_visited];
    926      break;
    927    }
    928 
    929    case MPACK_TOKEN_STR:
    930    case MPACK_TOKEN_BIN:
    931    case MPACK_TOKEN_EXT:
    932      assert(node->tok.type == MPACK_TOKEN_CHUNK);
    933      break;
    934 
    935    default:
    936      abort();
    937    }
    938  } else {
    939    result = parser->data.p;
    940  }
    941 
    942  // for types that are completed in typval_parse_exit
    943  node->data[0].p = result;
    944  node->data[1].p = NULL;  // free on error if non-NULL
    945 
    946  switch (node->tok.type) {
    947  case MPACK_TOKEN_NIL:
    948    *result = (typval_T) {
    949      .v_type = VAR_SPECIAL,
    950      .v_lock = VAR_UNLOCKED,
    951      .vval = { .v_special = kSpecialVarNull },
    952    };
    953    break;
    954  case MPACK_TOKEN_BOOLEAN:
    955    *result = (typval_T) {
    956      .v_type = VAR_BOOL,
    957      .v_lock = VAR_UNLOCKED,
    958      .vval = {
    959        .v_bool = mpack_unpack_boolean(node->tok) ? kBoolVarTrue : kBoolVarFalse
    960      },
    961    };
    962    break;
    963  case MPACK_TOKEN_SINT: {
    964    *result = (typval_T) {
    965      .v_type = VAR_NUMBER,
    966      .v_lock = VAR_UNLOCKED,
    967      .vval = { .v_number = mpack_unpack_sint(node->tok) },
    968    };
    969    break;
    970  }
    971  case MPACK_TOKEN_UINT:
    972    positive_integer_to_special_typval(result, mpack_unpack_uint(node->tok));
    973    break;
    974  case MPACK_TOKEN_FLOAT:
    975    *result = (typval_T) {
    976      .v_type = VAR_FLOAT,
    977      .v_lock = VAR_UNLOCKED,
    978      .vval = { .v_float = mpack_unpack_float(node->tok) },
    979    };
    980    break;
    981 
    982  case MPACK_TOKEN_BIN:
    983  case MPACK_TOKEN_STR:
    984  case MPACK_TOKEN_EXT:
    985    // actually converted in typval_parse_exit after the data chunks
    986    node->data[1].p = xmallocz(node->tok.length);
    987    break;
    988  case MPACK_TOKEN_CHUNK: {
    989    char *data = parent->data[1].p;
    990    memcpy(data + parent->pos,
    991           node->tok.data.chunk_ptr, node->tok.length);
    992    break;
    993  }
    994 
    995  case MPACK_TOKEN_ARRAY: {
    996    list_T *const list = tv_list_alloc((ptrdiff_t)node->tok.length);
    997    tv_list_ref(list);
    998    *result = (typval_T) {
    999      .v_type = VAR_LIST,
   1000      .v_lock = VAR_UNLOCKED,
   1001      .vval = { .v_list = list },
   1002    };
   1003    node->data[1].p = list;
   1004    break;
   1005  }
   1006  case MPACK_TOKEN_MAP:
   1007    // we don't know if this will be safe to convert to a typval dict yet
   1008    node->data[1].p = xmallocz(node->tok.length * 2 * sizeof(typval_T));
   1009    break;
   1010  }
   1011 }
   1012 
   1013 /// Free node which was entered but never exited, due to a nested error
   1014 ///
   1015 /// Don't bother with typvals as these will be GC:d eventually
   1016 void typval_parser_error_free(mpack_parser_t *parser)
   1017 {
   1018  for (uint32_t i = 0; i < parser->size; i++) {
   1019    mpack_node_t *node = &parser->items[i];
   1020    switch (node->tok.type) {
   1021    case MPACK_TOKEN_BIN:
   1022    case MPACK_TOKEN_STR:
   1023    case MPACK_TOKEN_EXT:
   1024    case MPACK_TOKEN_MAP:
   1025      XFREE_CLEAR(node->data[1].p);
   1026      break;
   1027    default:
   1028      break;
   1029    }
   1030  }
   1031 }
   1032 
   1033 static void typval_parse_exit(mpack_parser_t *parser, mpack_node_t *node)
   1034 {
   1035  typval_T *result = node->data[0].p;
   1036  switch (node->tok.type) {
   1037  case MPACK_TOKEN_BIN:
   1038  case MPACK_TOKEN_STR:
   1039    *result = decode_string(node->data[1].p, node->tok.length, false, true);
   1040    node->data[1].p = NULL;
   1041    break;
   1042 
   1043  case MPACK_TOKEN_EXT: {
   1044    list_T *const list = tv_list_alloc(2);
   1045    tv_list_ref(list);
   1046    tv_list_append_number(list, node->tok.data.ext_type);
   1047    list_T *const ext_val_list = tv_list_alloc(kListLenMayKnow);
   1048    tv_list_append_list(list, ext_val_list);
   1049    create_special_dict(result, kMPExt, ((typval_T) { .v_type = VAR_LIST,
   1050                                                      .v_lock = VAR_UNLOCKED,
   1051                                                      .vval = { .v_list = list } }));
   1052    // TODO(bfredl): why not use BLOB?
   1053    encode_list_write((void *)ext_val_list, node->data[1].p, node->tok.length);
   1054    XFREE_CLEAR(node->data[1].p);
   1055  }
   1056  break;
   1057 
   1058  case MPACK_TOKEN_MAP: {
   1059    typval_T(*items)[2] = node->data[1].p;
   1060    for (size_t i = 0; i < node->tok.length; i++) {
   1061      typval_T *key = &items[i][0];
   1062      if (key->v_type != VAR_STRING
   1063          || key->vval.v_string == NULL
   1064          || key->vval.v_string[0] == NUL) {
   1065        goto msgpack_to_vim_generic_map;
   1066      }
   1067    }
   1068    dict_T *const dict = tv_dict_alloc();
   1069    dict->dv_refcount++;
   1070    *result = (typval_T) {
   1071      .v_type = VAR_DICT,
   1072      .v_lock = VAR_UNLOCKED,
   1073      .vval = { .v_dict = dict },
   1074    };
   1075    for (size_t i = 0; i < node->tok.length; i++) {
   1076      char *key = items[i][0].vval.v_string;
   1077      size_t keylen = strlen(key);
   1078      dictitem_T *const di = xmallocz(offsetof(dictitem_T, di_key) + keylen);
   1079      memcpy(&di->di_key[0], key, keylen);
   1080      di->di_tv.v_type = VAR_UNKNOWN;
   1081      if (tv_dict_add(dict, di) == FAIL) {
   1082        // Duplicate key: fallback to generic map
   1083        TV_DICT_ITER(dict, d, {
   1084            d->di_tv.v_type = VAR_SPECIAL;  // don't free values in tv_clear(), they will be reused
   1085            d->di_tv.vval.v_special = kSpecialVarNull;
   1086          });
   1087        tv_clear(result);
   1088        xfree(di);
   1089        goto msgpack_to_vim_generic_map;
   1090      }
   1091      di->di_tv = items[i][1];
   1092    }
   1093    for (size_t i = 0; i < node->tok.length; i++) {
   1094      xfree(items[i][0].vval.v_string);
   1095    }
   1096    XFREE_CLEAR(node->data[1].p);
   1097    break;
   1098 msgpack_to_vim_generic_map: {}
   1099    list_T *const list = decode_create_map_special_dict(result, node->tok.length);
   1100    for (size_t i = 0; i < node->tok.length; i++) {
   1101      list_T *const kv_pair = tv_list_alloc(2);
   1102      tv_list_append_list(list, kv_pair);
   1103 
   1104      tv_list_append_owned_tv(kv_pair, items[i][0]);
   1105      tv_list_append_owned_tv(kv_pair, items[i][1]);
   1106    }
   1107    XFREE_CLEAR(node->data[1].p);
   1108    break;
   1109  }
   1110 
   1111  default:
   1112    // other kinds are handled completely in typval_parse_enter
   1113    break;
   1114  }
   1115 }
   1116 
   1117 int mpack_parse_typval(mpack_parser_t *parser, const char **data, size_t *size)
   1118 {
   1119  return mpack_parse(parser, data, size, typval_parse_enter, typval_parse_exit);
   1120 }
   1121 
   1122 int unpack_typval(const char **data, size_t *size, typval_T *ret)
   1123 {
   1124  ret->v_type = VAR_UNKNOWN;
   1125  mpack_parser_t parser;
   1126  mpack_parser_init(&parser, 0);
   1127  parser.data.p = ret;
   1128  int status = mpack_parse_typval(&parser, data, size);
   1129  if (status != MPACK_OK) {
   1130    typval_parser_error_free(&parser);
   1131    tv_clear(ret);
   1132  }
   1133  return status;
   1134 }