content.ts (14107B)
1 import {Fragment} from "./fragment" 2 import {NodeType} from "./schema" 3 4 type MatchEdge = {type: NodeType, next: ContentMatch} 5 6 /// Instances of this class represent a match state of a node type's 7 /// [content expression](#model.NodeSpec.content), and can be used to 8 /// find out whether further content matches here, and whether a given 9 /// position is a valid end of the node. 10 export class ContentMatch { 11 /// @internal 12 readonly next: MatchEdge[] = [] 13 /// @internal 14 readonly wrapCache: (NodeType | readonly NodeType[] | null)[] = [] 15 16 /// @internal 17 constructor( 18 /// True when this match state represents a valid end of the node. 19 readonly validEnd: boolean 20 ) {} 21 22 /// @internal 23 static parse(string: string, nodeTypes: {readonly [name: string]: NodeType}): ContentMatch { 24 let stream = new TokenStream(string, nodeTypes) 25 if (stream.next == null) return ContentMatch.empty 26 let expr = parseExpr(stream) 27 if (stream.next) stream.err("Unexpected trailing text") 28 let match = dfa(nfa(expr)) 29 checkForDeadEnds(match, stream) 30 return match 31 } 32 33 /// Match a node type, returning a match after that node if 34 /// successful. 35 matchType(type: NodeType): ContentMatch | null { 36 for (let i = 0; i < this.next.length; i++) 37 if (this.next[i].type == type) return this.next[i].next 38 return null 39 } 40 41 /// Try to match a fragment. Returns the resulting match when 42 /// successful. 43 matchFragment(frag: Fragment, start = 0, end = frag.childCount): ContentMatch | null { 44 let cur: ContentMatch | null = this 45 for (let i = start; cur && i < end; i++) 46 cur = cur.matchType(frag.child(i).type) 47 return cur 48 } 49 50 /// @internal 51 get inlineContent() { 52 return this.next.length != 0 && this.next[0].type.isInline 53 } 54 55 /// Get the first matching node type at this match position that can 56 /// be generated. 57 get defaultType(): NodeType | null { 58 for (let i = 0; i < this.next.length; i++) { 59 let {type} = this.next[i] 60 if (!(type.isText || type.hasRequiredAttrs())) return type 61 } 62 return null 63 } 64 65 /// @internal 66 compatible(other: ContentMatch) { 67 for (let i = 0; i < this.next.length; i++) 68 for (let j = 0; j < other.next.length; j++) 69 if (this.next[i].type == other.next[j].type) return true 70 return false 71 } 72 73 /// Try to match the given fragment, and if that fails, see if it can 74 /// be made to match by inserting nodes in front of it. When 75 /// successful, return a fragment of inserted nodes (which may be 76 /// empty if nothing had to be inserted). When `toEnd` is true, only 77 /// return a fragment if the resulting match goes to the end of the 78 /// content expression. 79 fillBefore(after: Fragment, toEnd = false, startIndex = 0): Fragment | null { 80 let seen: ContentMatch[] = [this] 81 function search(match: ContentMatch, types: readonly NodeType[]): Fragment | null { 82 let finished = match.matchFragment(after, startIndex) 83 if (finished && (!toEnd || finished.validEnd)) 84 return Fragment.from(types.map(tp => tp.createAndFill()!)) 85 86 for (let i = 0; i < match.next.length; i++) { 87 let {type, next} = match.next[i] 88 if (!(type.isText || type.hasRequiredAttrs()) && seen.indexOf(next) == -1) { 89 seen.push(next) 90 let found = search(next, types.concat(type)) 91 if (found) return found 92 } 93 } 94 return null 95 } 96 97 return search(this, []) 98 } 99 100 /// Find a set of wrapping node types that would allow a node of the 101 /// given type to appear at this position. The result may be empty 102 /// (when it fits directly) and will be null when no such wrapping 103 /// exists. 104 findWrapping(target: NodeType): readonly NodeType[] | null { 105 for (let i = 0; i < this.wrapCache.length; i += 2) 106 if (this.wrapCache[i] == target) return this.wrapCache[i + 1] as (readonly NodeType[] | null) 107 let computed = this.computeWrapping(target) 108 this.wrapCache.push(target, computed) 109 return computed 110 } 111 112 /// @internal 113 computeWrapping(target: NodeType): readonly NodeType[] | null { 114 type Active = {match: ContentMatch, type: NodeType | null, via: Active | null} 115 let seen = Object.create(null), active: Active[] = [{match: this, type: null, via: null}] 116 while (active.length) { 117 let current = active.shift()!, match = current.match 118 if (match.matchType(target)) { 119 let result: NodeType[] = [] 120 for (let obj: Active = current; obj.type; obj = obj.via!) 121 result.push(obj.type) 122 return result.reverse() 123 } 124 for (let i = 0; i < match.next.length; i++) { 125 let {type, next} = match.next[i] 126 if (!type.isLeaf && !type.hasRequiredAttrs() && !(type.name in seen) && (!current.type || next.validEnd)) { 127 active.push({match: type.contentMatch, type, via: current}) 128 seen[type.name] = true 129 } 130 } 131 } 132 return null 133 } 134 135 /// The number of outgoing edges this node has in the finite 136 /// automaton that describes the content expression. 137 get edgeCount() { 138 return this.next.length 139 } 140 141 /// Get the _n_th outgoing edge from this node in the finite 142 /// automaton that describes the content expression. 143 edge(n: number): MatchEdge { 144 if (n >= this.next.length) throw new RangeError(`There's no ${n}th edge in this content match`) 145 return this.next[n] 146 } 147 148 /// @internal 149 toString() { 150 let seen: ContentMatch[] = [] 151 function scan(m: ContentMatch) { 152 seen.push(m) 153 for (let i = 0; i < m.next.length; i++) 154 if (seen.indexOf(m.next[i].next) == -1) scan(m.next[i].next) 155 } 156 scan(this) 157 return seen.map((m, i) => { 158 let out = i + (m.validEnd ? "*" : " ") + " " 159 for (let i = 0; i < m.next.length; i++) 160 out += (i ? ", " : "") + m.next[i].type.name + "->" + seen.indexOf(m.next[i].next) 161 return out 162 }).join("\n") 163 } 164 165 /// @internal 166 static empty = new ContentMatch(true) 167 } 168 169 class TokenStream { 170 inline: boolean | null = null 171 pos = 0 172 tokens: string[] 173 174 constructor( 175 readonly string: string, 176 readonly nodeTypes: {readonly [name: string]: NodeType} 177 ) { 178 this.tokens = string.split(/\s*(?=\b|\W|$)/) 179 if (this.tokens[this.tokens.length - 1] == "") this.tokens.pop() 180 if (this.tokens[0] == "") this.tokens.shift() 181 } 182 183 get next() { return this.tokens[this.pos] } 184 185 eat(tok: string) { return this.next == tok && (this.pos++ || true) } 186 187 err(str: string): never { throw new SyntaxError(str + " (in content expression '" + this.string + "')") } 188 } 189 190 type Expr = 191 {type: "choice", exprs: Expr[]} | 192 {type: "seq", exprs: Expr[]} | 193 {type: "plus", expr: Expr} | 194 {type: "star", expr: Expr} | 195 {type: "opt", expr: Expr} | 196 {type: "range", min: number, max: number, expr: Expr} | 197 {type: "name", value: NodeType} 198 199 function parseExpr(stream: TokenStream): Expr { 200 let exprs: Expr[] = [] 201 do { exprs.push(parseExprSeq(stream)) } 202 while (stream.eat("|")) 203 return exprs.length == 1 ? exprs[0] : {type: "choice", exprs} 204 } 205 206 function parseExprSeq(stream: TokenStream): Expr { 207 let exprs: Expr[] = [] 208 do { exprs.push(parseExprSubscript(stream)) } 209 while (stream.next && stream.next != ")" && stream.next != "|") 210 return exprs.length == 1 ? exprs[0] : {type: "seq", exprs} 211 } 212 213 function parseExprSubscript(stream: TokenStream): Expr { 214 let expr = parseExprAtom(stream) 215 for (;;) { 216 if (stream.eat("+")) 217 expr = {type: "plus", expr} 218 else if (stream.eat("*")) 219 expr = {type: "star", expr} 220 else if (stream.eat("?")) 221 expr = {type: "opt", expr} 222 else if (stream.eat("{")) 223 expr = parseExprRange(stream, expr) 224 else break 225 } 226 return expr 227 } 228 229 function parseNum(stream: TokenStream) { 230 if (/\D/.test(stream.next)) stream.err("Expected number, got '" + stream.next + "'") 231 let result = Number(stream.next) 232 stream.pos++ 233 return result 234 } 235 236 function parseExprRange(stream: TokenStream, expr: Expr): Expr { 237 let min = parseNum(stream), max = min 238 if (stream.eat(",")) { 239 if (stream.next != "}") max = parseNum(stream) 240 else max = -1 241 } 242 if (!stream.eat("}")) stream.err("Unclosed braced range") 243 return {type: "range", min, max, expr} 244 } 245 246 function resolveName(stream: TokenStream, name: string): readonly NodeType[] { 247 let types = stream.nodeTypes, type = types[name] 248 if (type) return [type] 249 let result: NodeType[] = [] 250 for (let typeName in types) { 251 let type = types[typeName] 252 if (type.isInGroup(name)) result.push(type) 253 } 254 if (result.length == 0) stream.err("No node type or group '" + name + "' found") 255 return result 256 } 257 258 function parseExprAtom(stream: TokenStream): Expr { 259 if (stream.eat("(")) { 260 let expr = parseExpr(stream) 261 if (!stream.eat(")")) stream.err("Missing closing paren") 262 return expr 263 } else if (!/\W/.test(stream.next)) { 264 let exprs = resolveName(stream, stream.next).map(type => { 265 if (stream.inline == null) stream.inline = type.isInline 266 else if (stream.inline != type.isInline) stream.err("Mixing inline and block content") 267 return {type: "name", value: type} as Expr 268 }) 269 stream.pos++ 270 return exprs.length == 1 ? exprs[0] : {type: "choice", exprs} 271 } else { 272 stream.err("Unexpected token '" + stream.next + "'") 273 } 274 } 275 276 // The code below helps compile a regular-expression-like language 277 // into a deterministic finite automaton. For a good introduction to 278 // these concepts, see https://swtch.com/~rsc/regexp/regexp1.html 279 280 type Edge = {term: NodeType | undefined, to: number | undefined} 281 282 // Construct an NFA from an expression as returned by the parser. The 283 // NFA is represented as an array of states, which are themselves 284 // arrays of edges, which are `{term, to}` objects. The first state is 285 // the entry state and the last node is the success state. 286 // 287 // Note that unlike typical NFAs, the edge ordering in this one is 288 // significant, in that it is used to contruct filler content when 289 // necessary. 290 function nfa(expr: Expr): Edge[][] { 291 let nfa: Edge[][] = [[]] 292 connect(compile(expr, 0), node()) 293 return nfa 294 295 function node() { return nfa.push([]) - 1 } 296 function edge(from: number, to?: number, term?: NodeType) { 297 let edge = {term, to} 298 nfa[from].push(edge) 299 return edge 300 } 301 function connect(edges: Edge[], to: number) { 302 edges.forEach(edge => edge.to = to) 303 } 304 305 function compile(expr: Expr, from: number): Edge[] { 306 if (expr.type == "choice") { 307 return expr.exprs.reduce((out, expr) => out.concat(compile(expr, from)), [] as Edge[]) 308 } else if (expr.type == "seq") { 309 for (let i = 0;; i++) { 310 let next = compile(expr.exprs[i], from) 311 if (i == expr.exprs.length - 1) return next 312 connect(next, from = node()) 313 } 314 } else if (expr.type == "star") { 315 let loop = node() 316 edge(from, loop) 317 connect(compile(expr.expr, loop), loop) 318 return [edge(loop)] 319 } else if (expr.type == "plus") { 320 let loop = node() 321 connect(compile(expr.expr, from), loop) 322 connect(compile(expr.expr, loop), loop) 323 return [edge(loop)] 324 } else if (expr.type == "opt") { 325 return [edge(from)].concat(compile(expr.expr, from)) 326 } else if (expr.type == "range") { 327 let cur = from 328 for (let i = 0; i < expr.min; i++) { 329 let next = node() 330 connect(compile(expr.expr, cur), next) 331 cur = next 332 } 333 if (expr.max == -1) { 334 connect(compile(expr.expr, cur), cur) 335 } else { 336 for (let i = expr.min; i < expr.max; i++) { 337 let next = node() 338 edge(cur, next) 339 connect(compile(expr.expr, cur), next) 340 cur = next 341 } 342 } 343 return [edge(cur)] 344 } else if (expr.type == "name") { 345 return [edge(from, undefined, expr.value)] 346 } else { 347 throw new Error("Unknown expr type") 348 } 349 } 350 } 351 352 function cmp(a: number, b: number) { return b - a } 353 354 // Get the set of nodes reachable by null edges from `node`. Omit 355 // nodes with only a single null-out-edge, since they may lead to 356 // needless duplicated nodes. 357 function nullFrom(nfa: Edge[][], node: number): readonly number[] { 358 let result: number[] = [] 359 scan(node) 360 return result.sort(cmp) 361 362 function scan(node: number): void { 363 let edges = nfa[node] 364 if (edges.length == 1 && !edges[0].term) return scan(edges[0].to!) 365 result.push(node) 366 for (let i = 0; i < edges.length; i++) { 367 let {term, to} = edges[i] 368 if (!term && result.indexOf(to!) == -1) scan(to!) 369 } 370 } 371 } 372 373 // Compiles an NFA as produced by `nfa` into a DFA, modeled as a set 374 // of state objects (`ContentMatch` instances) with transitions 375 // between them. 376 function dfa(nfa: Edge[][]): ContentMatch { 377 let labeled = Object.create(null) 378 return explore(nullFrom(nfa, 0)) 379 380 function explore(states: readonly number[]) { 381 let out: [NodeType, number[]][] = [] 382 states.forEach(node => { 383 nfa[node].forEach(({term, to}) => { 384 if (!term) return 385 let set: number[] | undefined 386 for (let i = 0; i < out.length; i++) if (out[i][0] == term) set = out[i][1] 387 nullFrom(nfa, to!).forEach(node => { 388 if (!set) out.push([term, set = []]) 389 if (set.indexOf(node) == -1) set.push(node) 390 }) 391 }) 392 }) 393 let state = labeled[states.join(",")] = new ContentMatch(states.indexOf(nfa.length - 1) > -1) 394 for (let i = 0; i < out.length; i++) { 395 let states = out[i][1].sort(cmp) 396 state.next.push({type: out[i][0], next: labeled[states.join(",")] || explore(states)}) 397 } 398 return state 399 } 400 } 401 402 function checkForDeadEnds(match: ContentMatch, stream: TokenStream) { 403 for (let i = 0, work = [match]; i < work.length; i++) { 404 let state = work[i], dead = !state.validEnd, nodes: string[] = [] 405 for (let j = 0; j < state.next.length; j++) { 406 let {type, next} = state.next[j] 407 nodes.push(type.name) 408 if (dead && !(type.isText || type.hasRequiredAttrs())) dead = false 409 if (work.indexOf(next) == -1) work.push(next) 410 } 411 if (dead) stream.err("Only non-generatable nodes (" + nodes.join(", ") + ") in a required position (see https://prosemirror.net/docs/guide/#generatable)") 412 } 413 }