from_markdown.ts (10408B)
1 // @ts-ignore 2 import MarkdownIt from "markdown-it" 3 import Token from "markdown-it/lib/token.mjs" 4 import {schema} from "./schema" 5 import {Mark, MarkType, Node, Attrs, Schema, NodeType} from "prosemirror-model" 6 7 function maybeMerge(a: Node, b: Node): Node | undefined { 8 if (a.isText && b.isText && Mark.sameSet(a.marks, b.marks)) 9 return (a as any).withText(a.text! + b.text!) 10 } 11 12 // Object used to track the context of a running parse. 13 class MarkdownParseState { 14 stack: {type: NodeType, attrs: Attrs | null, content: Node[], marks: readonly Mark[]}[] 15 16 constructor( 17 readonly schema: Schema, 18 readonly tokenHandlers: {[token: string]: (stat: MarkdownParseState, token: Token, tokens: Token[], i: number) => void} 19 ) { 20 this.stack = [{type: schema.topNodeType, attrs: null, content: [], marks: Mark.none}] 21 } 22 23 top() { 24 return this.stack[this.stack.length - 1] 25 } 26 27 push(elt: Node) { 28 if (this.stack.length) this.top().content.push(elt) 29 } 30 31 // Adds the given text to the current position in the document, 32 // using the current marks as styling. 33 addText(text: string) { 34 if (!text) return 35 let top = this.top(), nodes = top.content, last = nodes[nodes.length - 1] 36 let node = this.schema.text(text, top.marks), merged 37 if (last && (merged = maybeMerge(last, node))) nodes[nodes.length - 1] = merged 38 else nodes.push(node) 39 } 40 41 // Adds the given mark to the set of active marks. 42 openMark(mark: Mark) { 43 let top = this.top() 44 top.marks = mark.addToSet(top.marks) 45 } 46 47 // Removes the given mark from the set of active marks. 48 closeMark(mark: MarkType) { 49 let top = this.top() 50 top.marks = mark.removeFromSet(top.marks) 51 } 52 53 parseTokens(toks: Token[]) { 54 for (let i = 0; i < toks.length; i++) { 55 let tok = toks[i] 56 let handler = this.tokenHandlers[tok.type] 57 if (!handler) 58 throw new Error("Token type `" + tok.type + "` not supported by Markdown parser") 59 handler(this, tok, toks, i) 60 } 61 } 62 63 // Add a node at the current position. 64 addNode(type: NodeType, attrs: Attrs | null, content?: readonly Node[]) { 65 let top = this.top() 66 let node = type.createAndFill(attrs, content, top ? top.marks : []) 67 if (!node) return null 68 this.push(node) 69 return node 70 } 71 72 // Wrap subsequent content in a node of the given type. 73 openNode(type: NodeType, attrs: Attrs | null) { 74 this.stack.push({type: type, attrs: attrs, content: [], marks: Mark.none}) 75 } 76 77 // Close and return the node that is currently on top of the stack. 78 closeNode() { 79 let info = this.stack.pop()! 80 return this.addNode(info.type, info.attrs, info.content) 81 } 82 } 83 84 function attrs(spec: ParseSpec, token: Token, tokens: Token[], i: number) { 85 if (spec.getAttrs) return spec.getAttrs(token, tokens, i) 86 // For backwards compatibility when `attrs` is a Function 87 else if (spec.attrs instanceof Function) return spec.attrs(token) 88 else return spec.attrs 89 } 90 91 // Code content is represented as a single token with a `content` 92 // property in Markdown-it. 93 function noCloseToken(spec: ParseSpec, type: string) { 94 return spec.noCloseToken || type == "code_inline" || type == "code_block" || type == "fence" 95 } 96 97 function withoutTrailingNewline(str: string) { 98 return str[str.length - 1] == "\n" ? str.slice(0, str.length - 1) : str 99 } 100 101 function noOp() {} 102 103 function tokenHandlers(schema: Schema, tokens: {[token: string]: ParseSpec}) { 104 let handlers: {[token: string]: (stat: MarkdownParseState, token: Token, tokens: Token[], i: number) => void} = 105 Object.create(null) 106 for (let type in tokens) { 107 let spec = tokens[type] 108 if (spec.block) { 109 let nodeType = schema.nodeType(spec.block) 110 if (noCloseToken(spec, type)) { 111 handlers[type] = (state, tok, tokens, i) => { 112 state.openNode(nodeType, attrs(spec, tok, tokens, i)) 113 state.addText(withoutTrailingNewline(tok.content)) 114 state.closeNode() 115 } 116 } else { 117 handlers[type + "_open"] = (state, tok, tokens, i) => state.openNode(nodeType, attrs(spec, tok, tokens, i)) 118 handlers[type + "_close"] = state => state.closeNode() 119 } 120 } else if (spec.node) { 121 let nodeType = schema.nodeType(spec.node) 122 handlers[type] = (state, tok, tokens, i) => state.addNode(nodeType, attrs(spec, tok, tokens, i)) 123 } else if (spec.mark) { 124 let markType = schema.marks[spec.mark] 125 if (noCloseToken(spec, type)) { 126 handlers[type] = (state, tok, tokens, i) => { 127 state.openMark(markType.create(attrs(spec, tok, tokens, i))) 128 state.addText(withoutTrailingNewline(tok.content)) 129 state.closeMark(markType) 130 } 131 } else { 132 handlers[type + "_open"] = (state, tok, tokens, i) => state.openMark(markType.create(attrs(spec, tok, tokens, i))) 133 handlers[type + "_close"] = state => state.closeMark(markType) 134 } 135 } else if (spec.ignore) { 136 if (noCloseToken(spec, type)) { 137 handlers[type] = noOp 138 } else { 139 handlers[type + "_open"] = noOp 140 handlers[type + "_close"] = noOp 141 } 142 } else { 143 throw new RangeError("Unrecognized parsing spec " + JSON.stringify(spec)) 144 } 145 } 146 147 handlers.text = (state, tok) => state.addText(tok.content) 148 handlers.inline = (state, tok) => state.parseTokens(tok.children!) 149 handlers.softbreak = handlers.softbreak || (state => state.addText(" ")) 150 151 return handlers 152 } 153 154 /// Object type used to specify how Markdown tokens should be parsed. 155 export interface ParseSpec { 156 /// This token maps to a single node, whose type can be looked up 157 /// in the schema under the given name. Exactly one of `node`, 158 /// `block`, or `mark` must be set. 159 node?: string 160 161 /// This token (unless `noCloseToken` is true) comes in `_open` 162 /// and `_close` variants (which are appended to the base token 163 /// name provides a the object property), and wraps a block of 164 /// content. The block should be wrapped in a node of the type 165 /// named to by the property's value. If the token does not have 166 /// `_open` or `_close`, use the `noCloseToken` option. 167 block?: string 168 169 /// This token (again, unless `noCloseToken` is true) also comes 170 /// in `_open` and `_close` variants, but should add a mark 171 /// (named by the value) to its content, rather than wrapping it 172 /// in a node. 173 mark?: string 174 175 /// Attributes for the node or mark. When `getAttrs` is provided, 176 /// it takes precedence. 177 attrs?: Attrs | null 178 179 /// A function used to compute the attributes for the node or mark 180 /// that takes a [markdown-it 181 /// token](https://markdown-it.github.io/markdown-it/#Token) and 182 /// returns an attribute object. 183 getAttrs?: (token: Token, tokenStream: Token[], index: number) => Attrs | null 184 185 /// Indicates that the [markdown-it 186 /// token](https://markdown-it.github.io/markdown-it/#Token) has 187 /// no `_open` or `_close` for the nodes. This defaults to `true` 188 /// for `code_inline`, `code_block` and `fence`. 189 noCloseToken?: boolean 190 191 /// When true, ignore content for the matched token. 192 ignore?: boolean 193 } 194 195 /// A configuration of a Markdown parser. Such a parser uses 196 /// [markdown-it](https://github.com/markdown-it/markdown-it) to 197 /// tokenize a file, and then runs the custom rules it is given over 198 /// the tokens to create a ProseMirror document tree. 199 export class MarkdownParser { 200 /// @internal 201 tokenHandlers: {[token: string]: (stat: MarkdownParseState, token: Token, tokens: Token[], i: number) => void} 202 203 /// Create a parser with the given configuration. You can configure 204 /// the markdown-it parser to parse the dialect you want, and provide 205 /// a description of the ProseMirror entities those tokens map to in 206 /// the `tokens` object, which maps token names to descriptions of 207 /// what to do with them. Such a description is an object, and may 208 /// have the following properties: 209 constructor( 210 /// The parser's document schema. 211 readonly schema: Schema, 212 /// This parser's markdown-it tokenizer. 213 readonly tokenizer: MarkdownIt, 214 /// The value of the `tokens` object used to construct this 215 /// parser. Can be useful to copy and modify to base other parsers 216 /// on. 217 readonly tokens: {[name: string]: ParseSpec} 218 ) { 219 this.tokenHandlers = tokenHandlers(schema, tokens) 220 } 221 222 /// Parse a string as [CommonMark](http://commonmark.org/) markup, 223 /// and create a ProseMirror document as prescribed by this parser's 224 /// rules. 225 /// 226 /// The second argument, when given, is passed through to the 227 /// [Markdown 228 /// parser](https://markdown-it.github.io/markdown-it/#MarkdownIt.parse). 229 parse(text: string, markdownEnv: Object = {}) { 230 let state = new MarkdownParseState(this.schema, this.tokenHandlers), doc 231 state.parseTokens(this.tokenizer.parse(text, markdownEnv)) 232 do { doc = state.closeNode() } while (state.stack.length) 233 return doc || this.schema.topNodeType.createAndFill()! 234 } 235 } 236 237 function listIsTight(tokens: readonly Token[], i: number) { 238 while (++i < tokens.length) 239 if (tokens[i].type != "list_item_open") return tokens[i].hidden 240 return false 241 } 242 243 /// A parser parsing unextended [CommonMark](http://commonmark.org/), 244 /// without inline HTML, and producing a document in the basic schema. 245 export const defaultMarkdownParser = new MarkdownParser(schema, MarkdownIt("commonmark", {html: false}), { 246 blockquote: {block: "blockquote"}, 247 paragraph: {block: "paragraph"}, 248 list_item: {block: "list_item"}, 249 bullet_list: {block: "bullet_list", getAttrs: (_, tokens, i) => ({tight: listIsTight(tokens, i)})}, 250 ordered_list: {block: "ordered_list", getAttrs: (tok, tokens, i) => ({ 251 order: +tok.attrGet("start")! || 1, 252 tight: listIsTight(tokens, i) 253 })}, 254 heading: {block: "heading", getAttrs: tok => ({level: +tok.tag.slice(1)})}, 255 code_block: {block: "code_block", noCloseToken: true}, 256 fence: {block: "code_block", getAttrs: tok => ({params: tok.info || ""}), noCloseToken: true}, 257 hr: {node: "horizontal_rule"}, 258 image: {node: "image", getAttrs: tok => ({ 259 src: tok.attrGet("src"), 260 title: tok.attrGet("title") || null, 261 alt: tok.children![0] && tok.children![0].content || null 262 })}, 263 hardbreak: {node: "hard_break"}, 264 265 em: {mark: "em"}, 266 strong: {mark: "strong"}, 267 link: {mark: "link", getAttrs: tok => ({ 268 href: tok.attrGet("href"), 269 title: tok.attrGet("title") || null 270 })}, 271 code_inline: {mark: "code", noCloseToken: true} 272 })