parser.c (11461B)
1 #include <assert.h> 2 #include <stdio.h> 3 #include <string.h> 4 5 #include "nvim/vterm/parser.h" 6 #include "nvim/vterm/vterm.h" 7 #include "nvim/vterm/vterm_internal_defs.h" 8 9 #include "vterm/parser.c.generated.h" 10 11 #undef DEBUG_PARSER 12 13 static bool is_intermed(uint8_t c) 14 { 15 return c >= 0x20 && c <= 0x2f; 16 } 17 18 static void do_control(VTerm *vt, uint8_t control) 19 { 20 if (vt->parser.callbacks && vt->parser.callbacks->control) { 21 if ((*vt->parser.callbacks->control)(control, vt->parser.cbdata)) { 22 return; 23 } 24 } 25 26 DEBUG_LOG("libvterm: Unhandled control 0x%02x\n", control); 27 } 28 29 static void do_csi(VTerm *vt, char command) 30 { 31 #ifdef DEBUG_PARSER 32 printf("Parsed CSI args as:\n", arglen, args); 33 printf(" leader: %s\n", vt->parser.v.csi.leader); 34 for (int argi = 0; argi < vt->parser.v.csi.argi; argi++) { 35 printf(" %lu", CSI_ARG(vt->parser.v.csi.args[argi])); 36 if (!CSI_ARG_HAS_MORE(vt->parser.v.csi.args[argi])) { 37 printf("\n"); 38 } 39 printf(" intermed: %s\n", vt->parser.intermed); 40 } 41 #endif 42 43 if (vt->parser.callbacks && vt->parser.callbacks->csi) { 44 if ((*vt->parser.callbacks->csi)(vt->parser.v.csi.leaderlen ? vt->parser.v.csi.leader : NULL, 45 vt->parser.v.csi.args, 46 vt->parser.v.csi.argi, 47 vt->parser.intermedlen ? vt->parser.intermed : NULL, 48 command, 49 vt->parser.cbdata)) { 50 return; 51 } 52 } 53 54 DEBUG_LOG("libvterm: Unhandled CSI %c\n", command); 55 } 56 57 static void do_escape(VTerm *vt, char command) 58 { 59 char seq[INTERMED_MAX + 1]; 60 61 size_t len = (size_t)vt->parser.intermedlen; 62 strncpy(seq, vt->parser.intermed, len); // NOLINT(runtime/printf) 63 seq[len++] = command; 64 seq[len] = 0; 65 66 if (vt->parser.callbacks && vt->parser.callbacks->escape) { 67 if ((*vt->parser.callbacks->escape)(seq, len, vt->parser.cbdata)) { 68 return; 69 } 70 } 71 72 DEBUG_LOG("libvterm: Unhandled escape ESC 0x%02x\n", command); 73 } 74 75 static void string_fragment(VTerm *vt, const char *str, size_t len, bool final, 76 VTermTerminator terminator) 77 { 78 VTermStringFragment frag = { 79 .str = str, 80 .len = len, 81 .initial = vt->parser.string_initial, 82 .final = final, 83 .terminator = terminator, 84 }; 85 86 switch (vt->parser.state) { 87 case OSC: 88 if (vt->parser.callbacks && vt->parser.callbacks->osc) { 89 (*vt->parser.callbacks->osc)(vt->parser.v.osc.command, frag, vt->parser.cbdata); 90 } 91 break; 92 93 case DCS_VTERM: 94 if (vt->parser.callbacks && vt->parser.callbacks->dcs) { 95 (*vt->parser.callbacks->dcs)(vt->parser.v.dcs.command, (size_t)vt->parser.v.dcs.commandlen, 96 frag, 97 vt->parser.cbdata); 98 } 99 break; 100 101 case APC: 102 if (vt->parser.callbacks && vt->parser.callbacks->apc) { 103 (*vt->parser.callbacks->apc)(frag, vt->parser.cbdata); 104 } 105 break; 106 107 case PM: 108 if (vt->parser.callbacks && vt->parser.callbacks->pm) { 109 (*vt->parser.callbacks->pm)(frag, vt->parser.cbdata); 110 } 111 break; 112 113 case SOS: 114 if (vt->parser.callbacks && vt->parser.callbacks->sos) { 115 (*vt->parser.callbacks->sos)(frag, vt->parser.cbdata); 116 } 117 break; 118 119 case NORMAL: 120 case CSI_LEADER: 121 case CSI_ARGS: 122 case CSI_INTERMED: 123 case OSC_COMMAND: 124 case DCS_COMMAND: 125 return; 126 } 127 128 vt->parser.string_initial = false; 129 } 130 131 size_t vterm_input_write(VTerm *vt, const char *bytes, size_t len) 132 { 133 size_t pos = 0; 134 const char *string_start; 135 136 switch (vt->parser.state) { 137 case NORMAL: 138 case CSI_LEADER: 139 case CSI_ARGS: 140 case CSI_INTERMED: 141 case OSC_COMMAND: 142 case DCS_COMMAND: 143 string_start = NULL; 144 break; 145 case OSC: 146 case DCS_VTERM: 147 case APC: 148 case PM: 149 case SOS: 150 string_start = bytes; 151 break; 152 } 153 154 #define ENTER_STATE(st) do { vt->parser.state = st; string_start = NULL; } while (0) 155 #define ENTER_NORMAL_STATE() ENTER_STATE(NORMAL) 156 157 #define IS_STRING_STATE() (vt->parser.state >= OSC_COMMAND) 158 159 for (; pos < len; pos++) { 160 uint8_t c = (uint8_t)bytes[pos]; 161 bool c1_allowed = !vt->mode.utf8; 162 163 if (c == 0x00 || c == 0x7f) { // NUL, DEL 164 if (IS_STRING_STATE()) { 165 string_fragment(vt, string_start, (size_t)(bytes + pos - string_start), false, 166 VTERM_TERMINATOR_ST); 167 string_start = bytes + pos + 1; 168 } 169 if (vt->parser.emit_nul) { 170 do_control(vt, c); 171 } 172 continue; 173 } 174 if (c == 0x18 || c == 0x1a) { // CAN, SUB 175 vt->parser.in_esc = false; 176 ENTER_NORMAL_STATE(); 177 if (vt->parser.emit_nul) { 178 do_control(vt, c); 179 } 180 continue; 181 } else if (c == 0x1b) { // ESC 182 vt->parser.intermedlen = 0; 183 if (!IS_STRING_STATE()) { 184 vt->parser.state = NORMAL; 185 } 186 vt->parser.in_esc = true; 187 continue; 188 } else if (c == 0x07 // BEL, can stand for ST in OSC or DCS state 189 && IS_STRING_STATE()) {} else if (c < 0x20) { // other C0 190 if (vt->parser.state == SOS) { 191 continue; // All other C0s permitted in SOS 192 } 193 if (IS_STRING_STATE()) { 194 string_fragment(vt, string_start, (size_t)(bytes + pos - string_start), false, 195 VTERM_TERMINATOR_ST); 196 } 197 do_control(vt, c); 198 if (IS_STRING_STATE()) { 199 string_start = bytes + pos + 1; 200 } 201 continue; 202 } 203 204 size_t string_len = (size_t)(bytes + pos - string_start); 205 206 if (vt->parser.in_esc) { 207 // Hoist an ESC letter into a C1 if we're not in a string mode 208 // Always accept ESC \ == ST even in string mode 209 if (!vt->parser.intermedlen 210 && c >= 0x40 && c < 0x60 211 && ((!IS_STRING_STATE() || c == 0x5c))) { 212 c += 0x40; 213 c1_allowed = true; 214 if (string_len) { 215 assert(string_len > 0); 216 string_len -= 1; 217 } 218 vt->parser.in_esc = false; 219 } else { 220 string_start = NULL; 221 vt->parser.state = NORMAL; 222 } 223 } 224 225 switch (vt->parser.state) { 226 case CSI_LEADER: 227 // Extract leader bytes 0x3c to 0x3f 228 if (c >= 0x3c && c <= 0x3f) { 229 if (vt->parser.v.csi.leaderlen < CSI_LEADER_MAX - 1) { 230 vt->parser.v.csi.leader[vt->parser.v.csi.leaderlen++] = (char)c; 231 } 232 break; 233 } 234 235 vt->parser.v.csi.leader[vt->parser.v.csi.leaderlen] = 0; 236 237 vt->parser.v.csi.argi = 0; 238 vt->parser.v.csi.args[0] = CSI_ARG_MISSING; 239 vt->parser.state = CSI_ARGS; 240 241 FALLTHROUGH; 242 case CSI_ARGS: 243 // Numerical value of argument 244 if (c >= '0' && c <= '9') { 245 if (vt->parser.v.csi.args[vt->parser.v.csi.argi] == CSI_ARG_MISSING) { 246 vt->parser.v.csi.args[vt->parser.v.csi.argi] = 0; 247 } 248 vt->parser.v.csi.args[vt->parser.v.csi.argi] *= 10; 249 vt->parser.v.csi.args[vt->parser.v.csi.argi] += c - '0'; 250 break; 251 } 252 if (c == ':') { 253 vt->parser.v.csi.args[vt->parser.v.csi.argi] |= CSI_ARG_FLAG_MORE; 254 c = ';'; 255 } 256 if (c == ';') { 257 vt->parser.v.csi.argi++; 258 vt->parser.v.csi.args[vt->parser.v.csi.argi] = CSI_ARG_MISSING; 259 break; 260 } 261 262 vt->parser.v.csi.argi++; 263 vt->parser.intermedlen = 0; 264 vt->parser.state = CSI_INTERMED; 265 FALLTHROUGH; 266 case CSI_INTERMED: 267 if (is_intermed(c)) { 268 if (vt->parser.intermedlen < INTERMED_MAX - 1) { 269 vt->parser.intermed[vt->parser.intermedlen++] = (char)c; 270 } 271 break; 272 } else if (c == 0x1b) { 273 // ESC in CSI cancels 274 } else if (c >= 0x40 && c <= 0x7e) { 275 vt->parser.intermed[vt->parser.intermedlen] = 0; 276 do_csi(vt, (char)c); 277 } 278 // else was invalid CSI 279 280 ENTER_NORMAL_STATE(); 281 break; 282 283 case OSC_COMMAND: 284 // Numerical value of command 285 if (c >= '0' && c <= '9') { 286 if (vt->parser.v.osc.command == -1) { 287 vt->parser.v.osc.command = 0; 288 } else { 289 vt->parser.v.osc.command *= 10; 290 } 291 vt->parser.v.osc.command += c - '0'; 292 break; 293 } 294 if (c == ';') { 295 vt->parser.state = OSC; 296 string_start = bytes + pos + 1; 297 break; 298 } 299 300 string_start = bytes + pos; 301 string_len = 0; 302 vt->parser.state = OSC; 303 goto string_state; 304 305 case DCS_COMMAND: 306 if (vt->parser.v.dcs.commandlen < CSI_LEADER_MAX) { 307 vt->parser.v.dcs.command[vt->parser.v.dcs.commandlen++] = (char)c; 308 } 309 310 if (c >= 0x40 && c <= 0x7e) { 311 string_start = bytes + pos + 1; 312 vt->parser.state = DCS_VTERM; 313 } 314 break; 315 316 string_state: 317 case OSC: 318 case DCS_VTERM: 319 case APC: 320 case PM: 321 case SOS: 322 if (c == 0x07 || (c1_allowed && c == 0x9c)) { 323 string_fragment(vt, string_start, string_len, true, 324 c == 0x07 ? VTERM_TERMINATOR_BEL : VTERM_TERMINATOR_ST); 325 ENTER_NORMAL_STATE(); 326 } 327 break; 328 329 case NORMAL: 330 if (vt->parser.in_esc) { 331 if (is_intermed(c)) { 332 if (vt->parser.intermedlen < INTERMED_MAX - 1) { 333 vt->parser.intermed[vt->parser.intermedlen++] = (char)c; 334 } 335 } else if (c >= 0x30 && c < 0x7f) { 336 do_escape(vt, (char)c); 337 vt->parser.in_esc = 0; 338 ENTER_NORMAL_STATE(); 339 } else { 340 DEBUG_LOG("TODO: Unhandled byte %02x in Escape\n", c); 341 } 342 break; 343 } 344 if (c1_allowed && c >= 0x80 && c < 0xa0) { 345 switch (c) { 346 case 0x90: // DCS 347 vt->parser.string_initial = true; 348 vt->parser.v.dcs.commandlen = 0; 349 ENTER_STATE(DCS_COMMAND); 350 break; 351 case 0x98: // SOS 352 vt->parser.string_initial = true; 353 ENTER_STATE(SOS); 354 string_start = bytes + pos + 1; 355 break; 356 case 0x9b: // CSI 357 vt->parser.v.csi.leaderlen = 0; 358 ENTER_STATE(CSI_LEADER); 359 break; 360 case 0x9d: // OSC 361 vt->parser.v.osc.command = -1; 362 vt->parser.string_initial = true; 363 ENTER_STATE(OSC_COMMAND); 364 break; 365 case 0x9e: // PM 366 vt->parser.string_initial = true; 367 ENTER_STATE(PM); 368 string_start = bytes + pos + 1; 369 break; 370 case 0x9f: // APC 371 vt->parser.string_initial = true; 372 ENTER_STATE(APC); 373 string_start = bytes + pos + 1; 374 break; 375 default: 376 do_control(vt, c); 377 break; 378 } 379 } else { 380 size_t eaten = 0; 381 if (vt->parser.callbacks && vt->parser.callbacks->text) { 382 eaten = (size_t)(*vt->parser.callbacks->text)(bytes + pos, len - pos, vt->parser.cbdata); 383 } 384 385 if (!eaten) { 386 DEBUG_LOG("libvterm: Text callback did not consume any input\n"); 387 // force it to make progress 388 eaten = 1; 389 } 390 391 pos += (eaten - 1); // we'll ++ it again in a moment 392 } 393 break; 394 } 395 } 396 397 if (string_start) { 398 size_t string_len = (size_t)(bytes + pos - string_start); 399 if (string_len > 0) { 400 if (vt->parser.in_esc) { 401 string_len -= 1; 402 } 403 string_fragment(vt, string_start, string_len, false, VTERM_TERMINATOR_ST); 404 } 405 } 406 407 return len; 408 } 409 410 void vterm_parser_set_callbacks(VTerm *vt, const VTermParserCallbacks *callbacks, void *user) 411 { 412 vt->parser.callbacks = callbacks; 413 vt->parser.cbdata = user; 414 }