neovim

Neovim text editor
git clone https://git.dasho.dev/neovim.git
Log | Files | Refs | README

parser.c (11461B)


      1 #include <assert.h>
      2 #include <stdio.h>
      3 #include <string.h>
      4 
      5 #include "nvim/vterm/parser.h"
      6 #include "nvim/vterm/vterm.h"
      7 #include "nvim/vterm/vterm_internal_defs.h"
      8 
      9 #include "vterm/parser.c.generated.h"
     10 
     11 #undef DEBUG_PARSER
     12 
     13 static bool is_intermed(uint8_t c)
     14 {
     15  return c >= 0x20 && c <= 0x2f;
     16 }
     17 
     18 static void do_control(VTerm *vt, uint8_t control)
     19 {
     20  if (vt->parser.callbacks && vt->parser.callbacks->control) {
     21    if ((*vt->parser.callbacks->control)(control, vt->parser.cbdata)) {
     22      return;
     23    }
     24  }
     25 
     26  DEBUG_LOG("libvterm: Unhandled control 0x%02x\n", control);
     27 }
     28 
     29 static void do_csi(VTerm *vt, char command)
     30 {
     31 #ifdef DEBUG_PARSER
     32  printf("Parsed CSI args as:\n", arglen, args);
     33  printf(" leader: %s\n", vt->parser.v.csi.leader);
     34  for (int argi = 0; argi < vt->parser.v.csi.argi; argi++) {
     35    printf(" %lu", CSI_ARG(vt->parser.v.csi.args[argi]));
     36    if (!CSI_ARG_HAS_MORE(vt->parser.v.csi.args[argi])) {
     37      printf("\n");
     38    }
     39    printf(" intermed: %s\n", vt->parser.intermed);
     40  }
     41 #endif
     42 
     43  if (vt->parser.callbacks && vt->parser.callbacks->csi) {
     44    if ((*vt->parser.callbacks->csi)(vt->parser.v.csi.leaderlen ? vt->parser.v.csi.leader : NULL,
     45                                     vt->parser.v.csi.args,
     46                                     vt->parser.v.csi.argi,
     47                                     vt->parser.intermedlen ? vt->parser.intermed : NULL,
     48                                     command,
     49                                     vt->parser.cbdata)) {
     50      return;
     51    }
     52  }
     53 
     54  DEBUG_LOG("libvterm: Unhandled CSI %c\n", command);
     55 }
     56 
     57 static void do_escape(VTerm *vt, char command)
     58 {
     59  char seq[INTERMED_MAX + 1];
     60 
     61  size_t len = (size_t)vt->parser.intermedlen;
     62  strncpy(seq, vt->parser.intermed, len);  // NOLINT(runtime/printf)
     63  seq[len++] = command;
     64  seq[len] = 0;
     65 
     66  if (vt->parser.callbacks && vt->parser.callbacks->escape) {
     67    if ((*vt->parser.callbacks->escape)(seq, len, vt->parser.cbdata)) {
     68      return;
     69    }
     70  }
     71 
     72  DEBUG_LOG("libvterm: Unhandled escape ESC 0x%02x\n", command);
     73 }
     74 
     75 static void string_fragment(VTerm *vt, const char *str, size_t len, bool final,
     76                            VTermTerminator terminator)
     77 {
     78  VTermStringFragment frag = {
     79    .str = str,
     80    .len = len,
     81    .initial = vt->parser.string_initial,
     82    .final = final,
     83    .terminator = terminator,
     84  };
     85 
     86  switch (vt->parser.state) {
     87  case OSC:
     88    if (vt->parser.callbacks && vt->parser.callbacks->osc) {
     89      (*vt->parser.callbacks->osc)(vt->parser.v.osc.command, frag, vt->parser.cbdata);
     90    }
     91    break;
     92 
     93  case DCS_VTERM:
     94    if (vt->parser.callbacks && vt->parser.callbacks->dcs) {
     95      (*vt->parser.callbacks->dcs)(vt->parser.v.dcs.command, (size_t)vt->parser.v.dcs.commandlen,
     96                                   frag,
     97                                   vt->parser.cbdata);
     98    }
     99    break;
    100 
    101  case APC:
    102    if (vt->parser.callbacks && vt->parser.callbacks->apc) {
    103      (*vt->parser.callbacks->apc)(frag, vt->parser.cbdata);
    104    }
    105    break;
    106 
    107  case PM:
    108    if (vt->parser.callbacks && vt->parser.callbacks->pm) {
    109      (*vt->parser.callbacks->pm)(frag, vt->parser.cbdata);
    110    }
    111    break;
    112 
    113  case SOS:
    114    if (vt->parser.callbacks && vt->parser.callbacks->sos) {
    115      (*vt->parser.callbacks->sos)(frag, vt->parser.cbdata);
    116    }
    117    break;
    118 
    119  case NORMAL:
    120  case CSI_LEADER:
    121  case CSI_ARGS:
    122  case CSI_INTERMED:
    123  case OSC_COMMAND:
    124  case DCS_COMMAND:
    125    return;
    126  }
    127 
    128  vt->parser.string_initial = false;
    129 }
    130 
    131 size_t vterm_input_write(VTerm *vt, const char *bytes, size_t len)
    132 {
    133  size_t pos = 0;
    134  const char *string_start;
    135 
    136  switch (vt->parser.state) {
    137  case NORMAL:
    138  case CSI_LEADER:
    139  case CSI_ARGS:
    140  case CSI_INTERMED:
    141  case OSC_COMMAND:
    142  case DCS_COMMAND:
    143    string_start = NULL;
    144    break;
    145  case OSC:
    146  case DCS_VTERM:
    147  case APC:
    148  case PM:
    149  case SOS:
    150    string_start = bytes;
    151    break;
    152  }
    153 
    154 #define ENTER_STATE(st)        do { vt->parser.state = st; string_start = NULL; } while (0)
    155 #define ENTER_NORMAL_STATE()   ENTER_STATE(NORMAL)
    156 
    157 #define IS_STRING_STATE()      (vt->parser.state >= OSC_COMMAND)
    158 
    159  for (; pos < len; pos++) {
    160    uint8_t c = (uint8_t)bytes[pos];
    161    bool c1_allowed = !vt->mode.utf8;
    162 
    163    if (c == 0x00 || c == 0x7f) {  // NUL, DEL
    164      if (IS_STRING_STATE()) {
    165        string_fragment(vt, string_start, (size_t)(bytes + pos - string_start), false,
    166                        VTERM_TERMINATOR_ST);
    167        string_start = bytes + pos + 1;
    168      }
    169      if (vt->parser.emit_nul) {
    170        do_control(vt, c);
    171      }
    172      continue;
    173    }
    174    if (c == 0x18 || c == 0x1a) {  // CAN, SUB
    175      vt->parser.in_esc = false;
    176      ENTER_NORMAL_STATE();
    177      if (vt->parser.emit_nul) {
    178        do_control(vt, c);
    179      }
    180      continue;
    181    } else if (c == 0x1b) {  // ESC
    182      vt->parser.intermedlen = 0;
    183      if (!IS_STRING_STATE()) {
    184        vt->parser.state = NORMAL;
    185      }
    186      vt->parser.in_esc = true;
    187      continue;
    188    } else if (c == 0x07     // BEL, can stand for ST in OSC or DCS state
    189               && IS_STRING_STATE()) {} else if (c < 0x20) {  // other C0
    190      if (vt->parser.state == SOS) {
    191        continue;  // All other C0s permitted in SOS
    192      }
    193      if (IS_STRING_STATE()) {
    194        string_fragment(vt, string_start, (size_t)(bytes + pos - string_start), false,
    195                        VTERM_TERMINATOR_ST);
    196      }
    197      do_control(vt, c);
    198      if (IS_STRING_STATE()) {
    199        string_start = bytes + pos + 1;
    200      }
    201      continue;
    202    }
    203 
    204    size_t string_len = (size_t)(bytes + pos - string_start);
    205 
    206    if (vt->parser.in_esc) {
    207      // Hoist an ESC letter into a C1 if we're not in a string mode
    208      // Always accept ESC \ == ST even in string mode
    209      if (!vt->parser.intermedlen
    210          && c >= 0x40 && c < 0x60
    211          && ((!IS_STRING_STATE() || c == 0x5c))) {
    212        c += 0x40;
    213        c1_allowed = true;
    214        if (string_len) {
    215          assert(string_len > 0);
    216          string_len -= 1;
    217        }
    218        vt->parser.in_esc = false;
    219      } else {
    220        string_start = NULL;
    221        vt->parser.state = NORMAL;
    222      }
    223    }
    224 
    225    switch (vt->parser.state) {
    226    case CSI_LEADER:
    227      // Extract leader bytes 0x3c to 0x3f
    228      if (c >= 0x3c && c <= 0x3f) {
    229        if (vt->parser.v.csi.leaderlen < CSI_LEADER_MAX - 1) {
    230          vt->parser.v.csi.leader[vt->parser.v.csi.leaderlen++] = (char)c;
    231        }
    232        break;
    233      }
    234 
    235      vt->parser.v.csi.leader[vt->parser.v.csi.leaderlen] = 0;
    236 
    237      vt->parser.v.csi.argi = 0;
    238      vt->parser.v.csi.args[0] = CSI_ARG_MISSING;
    239      vt->parser.state = CSI_ARGS;
    240 
    241      FALLTHROUGH;
    242    case CSI_ARGS:
    243      // Numerical value of argument
    244      if (c >= '0' && c <= '9') {
    245        if (vt->parser.v.csi.args[vt->parser.v.csi.argi] == CSI_ARG_MISSING) {
    246          vt->parser.v.csi.args[vt->parser.v.csi.argi] = 0;
    247        }
    248        vt->parser.v.csi.args[vt->parser.v.csi.argi] *= 10;
    249        vt->parser.v.csi.args[vt->parser.v.csi.argi] += c - '0';
    250        break;
    251      }
    252      if (c == ':') {
    253        vt->parser.v.csi.args[vt->parser.v.csi.argi] |= CSI_ARG_FLAG_MORE;
    254        c = ';';
    255      }
    256      if (c == ';') {
    257        vt->parser.v.csi.argi++;
    258        vt->parser.v.csi.args[vt->parser.v.csi.argi] = CSI_ARG_MISSING;
    259        break;
    260      }
    261 
    262      vt->parser.v.csi.argi++;
    263      vt->parser.intermedlen = 0;
    264      vt->parser.state = CSI_INTERMED;
    265      FALLTHROUGH;
    266    case CSI_INTERMED:
    267      if (is_intermed(c)) {
    268        if (vt->parser.intermedlen < INTERMED_MAX - 1) {
    269          vt->parser.intermed[vt->parser.intermedlen++] = (char)c;
    270        }
    271        break;
    272      } else if (c == 0x1b) {
    273        // ESC in CSI cancels
    274      } else if (c >= 0x40 && c <= 0x7e) {
    275        vt->parser.intermed[vt->parser.intermedlen] = 0;
    276        do_csi(vt, (char)c);
    277      }
    278      // else was invalid CSI
    279 
    280      ENTER_NORMAL_STATE();
    281      break;
    282 
    283    case OSC_COMMAND:
    284      // Numerical value of command
    285      if (c >= '0' && c <= '9') {
    286        if (vt->parser.v.osc.command == -1) {
    287          vt->parser.v.osc.command = 0;
    288        } else {
    289          vt->parser.v.osc.command *= 10;
    290        }
    291        vt->parser.v.osc.command += c - '0';
    292        break;
    293      }
    294      if (c == ';') {
    295        vt->parser.state = OSC;
    296        string_start = bytes + pos + 1;
    297        break;
    298      }
    299 
    300      string_start = bytes + pos;
    301      string_len = 0;
    302      vt->parser.state = OSC;
    303      goto string_state;
    304 
    305    case DCS_COMMAND:
    306      if (vt->parser.v.dcs.commandlen < CSI_LEADER_MAX) {
    307        vt->parser.v.dcs.command[vt->parser.v.dcs.commandlen++] = (char)c;
    308      }
    309 
    310      if (c >= 0x40 && c <= 0x7e) {
    311        string_start = bytes + pos + 1;
    312        vt->parser.state = DCS_VTERM;
    313      }
    314      break;
    315 
    316 string_state:
    317    case OSC:
    318    case DCS_VTERM:
    319    case APC:
    320    case PM:
    321    case SOS:
    322      if (c == 0x07 || (c1_allowed && c == 0x9c)) {
    323        string_fragment(vt, string_start, string_len, true,
    324                        c == 0x07 ? VTERM_TERMINATOR_BEL : VTERM_TERMINATOR_ST);
    325        ENTER_NORMAL_STATE();
    326      }
    327      break;
    328 
    329    case NORMAL:
    330      if (vt->parser.in_esc) {
    331        if (is_intermed(c)) {
    332          if (vt->parser.intermedlen < INTERMED_MAX - 1) {
    333            vt->parser.intermed[vt->parser.intermedlen++] = (char)c;
    334          }
    335        } else if (c >= 0x30 && c < 0x7f) {
    336          do_escape(vt, (char)c);
    337          vt->parser.in_esc = 0;
    338          ENTER_NORMAL_STATE();
    339        } else {
    340          DEBUG_LOG("TODO: Unhandled byte %02x in Escape\n", c);
    341        }
    342        break;
    343      }
    344      if (c1_allowed && c >= 0x80 && c < 0xa0) {
    345        switch (c) {
    346        case 0x90:  // DCS
    347          vt->parser.string_initial = true;
    348          vt->parser.v.dcs.commandlen = 0;
    349          ENTER_STATE(DCS_COMMAND);
    350          break;
    351        case 0x98:  // SOS
    352          vt->parser.string_initial = true;
    353          ENTER_STATE(SOS);
    354          string_start = bytes + pos + 1;
    355          break;
    356        case 0x9b:  // CSI
    357          vt->parser.v.csi.leaderlen = 0;
    358          ENTER_STATE(CSI_LEADER);
    359          break;
    360        case 0x9d:  // OSC
    361          vt->parser.v.osc.command = -1;
    362          vt->parser.string_initial = true;
    363          ENTER_STATE(OSC_COMMAND);
    364          break;
    365        case 0x9e:  // PM
    366          vt->parser.string_initial = true;
    367          ENTER_STATE(PM);
    368          string_start = bytes + pos + 1;
    369          break;
    370        case 0x9f:  // APC
    371          vt->parser.string_initial = true;
    372          ENTER_STATE(APC);
    373          string_start = bytes + pos + 1;
    374          break;
    375        default:
    376          do_control(vt, c);
    377          break;
    378        }
    379      } else {
    380        size_t eaten = 0;
    381        if (vt->parser.callbacks && vt->parser.callbacks->text) {
    382          eaten = (size_t)(*vt->parser.callbacks->text)(bytes + pos, len - pos, vt->parser.cbdata);
    383        }
    384 
    385        if (!eaten) {
    386          DEBUG_LOG("libvterm: Text callback did not consume any input\n");
    387          // force it to make progress
    388          eaten = 1;
    389        }
    390 
    391        pos += (eaten - 1);  // we'll ++ it again in a moment
    392      }
    393      break;
    394    }
    395  }
    396 
    397  if (string_start) {
    398    size_t string_len = (size_t)(bytes + pos - string_start);
    399    if (string_len > 0) {
    400      if (vt->parser.in_esc) {
    401        string_len -= 1;
    402      }
    403      string_fragment(vt, string_start, string_len, false, VTERM_TERMINATOR_ST);
    404    }
    405  }
    406 
    407  return len;
    408 }
    409 
    410 void vterm_parser_set_callbacks(VTerm *vt, const VTermParserCallbacks *callbacks, void *user)
    411 {
    412  vt->parser.callbacks = callbacks;
    413  vt->parser.cbdata = user;
    414 }