to_markdown.ts (18073B)
1 import {Node, Mark} from "prosemirror-model" 2 3 type MarkSerializerSpec = { 4 /// The string that should appear before a piece of content marked 5 /// by this mark, either directly or as a function that returns an 6 /// appropriate string. 7 open: string | ((state: MarkdownSerializerState, mark: Mark, parent: Node, index: number) => string), 8 /// The string that should appear after a piece of content marked by 9 /// this mark. 10 close: string | ((state: MarkdownSerializerState, mark: Mark, parent: Node, index: number) => string), 11 /// When `true`, this indicates that the order in which the mark's 12 /// opening and closing syntax appears relative to other mixable 13 /// marks can be varied. (For example, you can say `**a *b***` and 14 /// `*a **b***`, but not `` `a *b*` ``.) 15 mixable?: boolean, 16 /// When enabled, causes the serializer to move enclosing whitespace 17 /// from inside the marks to outside the marks. This is necessary 18 /// for emphasis marks as CommonMark does not permit enclosing 19 /// whitespace inside emphasis marks, see: 20 /// http:///spec.commonmark.org/0.26/#example-330 21 expelEnclosingWhitespace?: boolean, 22 /// Can be set to `false` to disable character escaping in a mark. A 23 /// non-escaping mark has to have the highest precedence (must 24 /// always be the innermost mark). 25 escape?: boolean 26 } 27 28 const blankMark: MarkSerializerSpec = {open: "", close: "", mixable: true} 29 30 /// A specification for serializing a ProseMirror document as 31 /// Markdown/CommonMark text. 32 export class MarkdownSerializer { 33 /// Construct a serializer with the given configuration. The `nodes` 34 /// object should map node names in a given schema to function that 35 /// take a serializer state and such a node, and serialize the node. 36 constructor( 37 /// The node serializer functions for this serializer. 38 readonly nodes: {[node: string]: (state: MarkdownSerializerState, node: Node, parent: Node, index: number) => void}, 39 /// The mark serializer info. 40 readonly marks: {[mark: string]: MarkSerializerSpec}, 41 readonly options: { 42 /// Extra characters can be added for escaping. This is passed 43 /// directly to String.replace(), and the matching characters are 44 /// preceded by a backslash. 45 escapeExtraCharacters?: RegExp, 46 /// Specify the node name of hard breaks. 47 /// Defaults to "hard_break" 48 hardBreakNodeName?: string, 49 /// By default, the serializer raises an error when it finds a 50 /// node or mark type for which no serializer is defined. Set 51 /// this to `false` to make it just ignore such elements, 52 /// rendering only their content. 53 strict?: boolean 54 } = {} 55 ) {} 56 57 /// Serialize the content of the given node to 58 /// [CommonMark](http://commonmark.org/). 59 serialize(content: Node, options: { 60 /// Whether to render lists in a tight style. This can be overridden 61 /// on a node level by specifying a tight attribute on the node. 62 /// Defaults to false. 63 tightLists?: boolean 64 } = {}) { 65 options = Object.assign({}, this.options, options) 66 let state = new MarkdownSerializerState(this.nodes, this.marks, options) 67 state.renderContent(content) 68 return state.out 69 } 70 } 71 72 /// A serializer for the [basic schema](#schema). 73 export const defaultMarkdownSerializer = new MarkdownSerializer({ 74 blockquote(state, node) { 75 state.wrapBlock("> ", null, node, () => state.renderContent(node)) 76 }, 77 code_block(state, node) { 78 // Make sure the front matter fences are longer than any dash sequence within it 79 const backticks = node.textContent.match(/`{3,}/gm) 80 const fence = backticks ? (backticks.sort().slice(-1)[0] + "`") : "```" 81 82 state.write(fence + (node.attrs.params || "") + "\n") 83 state.text(node.textContent, false) 84 // Add a newline to the current content before adding closing marker 85 state.write("\n") 86 state.write(fence) 87 state.closeBlock(node) 88 }, 89 heading(state, node) { 90 state.write(state.repeat("#", node.attrs.level) + " ") 91 state.renderInline(node, false) 92 state.closeBlock(node) 93 }, 94 horizontal_rule(state, node) { 95 state.write(node.attrs.markup || "---") 96 state.closeBlock(node) 97 }, 98 bullet_list(state, node) { 99 state.renderList(node, " ", () => (node.attrs.bullet || "*") + " ") 100 }, 101 ordered_list(state, node) { 102 let start = node.attrs.order || 1 103 let maxW = String(start + node.childCount - 1).length 104 let space = state.repeat(" ", maxW + 2) 105 state.renderList(node, space, i => { 106 let nStr = String(start + i) 107 return state.repeat(" ", maxW - nStr.length) + nStr + ". " 108 }) 109 }, 110 list_item(state, node) { 111 state.renderContent(node) 112 }, 113 paragraph(state, node) { 114 state.renderInline(node) 115 state.closeBlock(node) 116 }, 117 118 image(state, node) { 119 state.write("]/g, "\\$&") + 120 (node.attrs.title ? ' "' + node.attrs.title.replace(/"/g, '\\"') + '"' : "") + ")") 121 }, 122 hard_break(state, node, parent, index) { 123 for (let i = index + 1; i < parent.childCount; i++) 124 if (parent.child(i).type != node.type) { 125 state.write("\\\n") 126 return 127 } 128 }, 129 text(state, node) { 130 state.text(node.text!, !state.inAutolink) 131 } 132 }, { 133 em: {open: "*", close: "*", mixable: true, expelEnclosingWhitespace: true}, 134 strong: {open: "**", close: "**", mixable: true, expelEnclosingWhitespace: true}, 135 link: { 136 open(state, mark, parent, index) { 137 state.inAutolink = isPlainURL(mark, parent, index) 138 return state.inAutolink ? "<" : "[" 139 }, 140 close(state, mark, parent, index) { 141 let {inAutolink} = state 142 state.inAutolink = undefined 143 return inAutolink ? ">" 144 : "](" + mark.attrs.href.replace(/[\(\)"]/g, "\\$&") + (mark.attrs.title ? ` "${mark.attrs.title.replace(/"/g, '\\"')}"` : "") + ")" 145 }, 146 mixable: true 147 }, 148 code: {open(_state, _mark, parent, index) { return backticksFor(parent.child(index), -1) }, 149 close(_state, _mark, parent, index) { return backticksFor(parent.child(index - 1), 1) }, 150 escape: false} 151 }) 152 153 function backticksFor(node: Node, side: number) { 154 let ticks = /`+/g, m, len = 0 155 if (node.isText) while (m = ticks.exec(node.text!)) len = Math.max(len, m[0].length) 156 let result = len > 0 && side > 0 ? " `" : "`" 157 for (let i = 0; i < len; i++) result += "`" 158 if (len > 0 && side < 0) result += " " 159 return result 160 } 161 162 function isPlainURL(link: Mark, parent: Node, index: number) { 163 if (link.attrs.title || !/^\w+:/.test(link.attrs.href)) return false 164 let content = parent.child(index) 165 if (!content.isText || content.text != link.attrs.href || content.marks[content.marks.length - 1] != link) return false 166 return index == parent.childCount - 1 || !link.isInSet(parent.child(index + 1).marks) 167 } 168 169 /// This is an object used to track state and expose 170 /// methods related to markdown serialization. Instances are passed to 171 /// node and mark serialization methods (see `toMarkdown`). 172 export class MarkdownSerializerState { 173 /// @internal 174 delim: string = "" 175 /// @internal 176 out: string = "" 177 /// @internal 178 closed: Node | null = null 179 /// @internal 180 inAutolink: boolean | undefined = undefined 181 /// @internal 182 atBlockStart: boolean = false 183 /// @internal 184 inTightList: boolean = false 185 186 /// @internal 187 constructor( 188 /// @internal 189 readonly nodes: {[node: string]: (state: MarkdownSerializerState, node: Node, parent: Node, index: number) => void}, 190 /// @internal 191 readonly marks: {[mark: string]: MarkSerializerSpec}, 192 /// The options passed to the serializer. 193 readonly options: {tightLists?: boolean, escapeExtraCharacters?: RegExp, hardBreakNodeName?: string, strict?: boolean} 194 ) { 195 if (typeof this.options.tightLists == "undefined") 196 this.options.tightLists = false 197 if (typeof this.options.hardBreakNodeName == "undefined") 198 this.options.hardBreakNodeName = "hard_break" 199 } 200 201 /// @internal 202 flushClose(size: number = 2) { 203 if (this.closed) { 204 if (!this.atBlank()) this.out += "\n" 205 if (size > 1) { 206 let delimMin = this.delim 207 let trim = /\s+$/.exec(delimMin) 208 if (trim) delimMin = delimMin.slice(0, delimMin.length - trim[0].length) 209 for (let i = 1; i < size; i++) 210 this.out += delimMin + "\n" 211 } 212 this.closed = null 213 } 214 } 215 216 /// @internal 217 getMark(name: string) { 218 let info = this.marks[name] 219 if (!info) { 220 if (this.options.strict !== false) 221 throw new Error(`Mark type \`${name}\` not supported by Markdown renderer`) 222 info = blankMark 223 } 224 return info 225 } 226 227 /// Render a block, prefixing each line with `delim`, and the first 228 /// line in `firstDelim`. `node` should be the node that is closed at 229 /// the end of the block, and `f` is a function that renders the 230 /// content of the block. 231 wrapBlock(delim: string, firstDelim: string | null, node: Node, f: () => void) { 232 let old = this.delim 233 this.write(firstDelim != null ? firstDelim : delim) 234 this.delim += delim 235 f() 236 this.delim = old 237 this.closeBlock(node) 238 } 239 240 /// @internal 241 atBlank() { 242 return /(^|\n)$/.test(this.out) 243 } 244 245 /// Ensure the current content ends with a newline. 246 ensureNewLine() { 247 if (!this.atBlank()) this.out += "\n" 248 } 249 250 /// Prepare the state for writing output (closing closed paragraphs, 251 /// adding delimiters, and so on), and then optionally add content 252 /// (unescaped) to the output. 253 write(content?: string) { 254 this.flushClose() 255 if (this.delim && this.atBlank()) 256 this.out += this.delim 257 if (content) this.out += content 258 } 259 260 /// Close the block for the given node. 261 closeBlock(node: Node) { 262 this.closed = node 263 } 264 265 /// Add the given text to the document. When escape is not `false`, 266 /// it will be escaped. 267 text(text: string, escape = true) { 268 let lines = text.split("\n") 269 for (let i = 0; i < lines.length; i++) { 270 this.write() 271 // Escape exclamation marks in front of links 272 if (!escape && lines[i][0] == "[" && /(^|[^\\])\!$/.test(this.out)) 273 this.out = this.out.slice(0, this.out.length - 1) + "\\!" 274 this.out += escape ? this.esc(lines[i], this.atBlockStart) : lines[i] 275 if (i != lines.length - 1) this.out += "\n" 276 } 277 } 278 279 /// Render the given node as a block. 280 render(node: Node, parent: Node, index: number) { 281 if (this.nodes[node.type.name]) { 282 this.nodes[node.type.name](this, node, parent, index) 283 } else { 284 if (this.options.strict !== false) { 285 throw new Error("Token type `" + node.type.name + "` not supported by Markdown renderer") 286 } else if (!node.type.isLeaf) { 287 if (node.type.inlineContent) this.renderInline(node) 288 else this.renderContent(node) 289 if (node.isBlock) this.closeBlock(node) 290 } 291 } 292 } 293 294 /// Render the contents of `parent` as block nodes. 295 renderContent(parent: Node) { 296 parent.forEach((node, _, i) => this.render(node, parent, i)) 297 } 298 299 /// Render the contents of `parent` as inline content. 300 renderInline(parent: Node, fromBlockStart = true) { 301 this.atBlockStart = fromBlockStart 302 let active: Mark[] = [], trailing = "" 303 let progress = (node: Node | null, offset: number, index: number) => { 304 let marks = node ? node.marks : [] 305 306 // Remove marks from `hard_break` that are the last node inside 307 // that mark to prevent parser edge cases with new lines just 308 // before closing marks. 309 if (node && node.type.name === this.options.hardBreakNodeName) 310 marks = marks.filter(m => { 311 if (index + 1 == parent.childCount) return false 312 let next = parent.child(index + 1) 313 return m.isInSet(next.marks) && (!next.isText || /\S/.test(next.text!)) 314 }) 315 316 let leading = trailing 317 trailing = "" 318 // If whitespace has to be expelled from the node, adjust 319 // leading and trailing accordingly. 320 if (node && node.isText && marks.some(mark => { 321 let info = this.getMark(mark.type.name) 322 return info && info.expelEnclosingWhitespace && !mark.isInSet(active) 323 })) { 324 let [_, lead, rest] = /^(\s*)(.*)$/m.exec(node.text!)! 325 if (lead) { 326 leading += lead 327 node = rest ? (node as any).withText(rest) : null 328 if (!node) marks = active 329 } 330 } 331 if (node && node.isText && marks.some(mark => { 332 let info = this.getMark(mark.type.name) 333 return info && info.expelEnclosingWhitespace && 334 (index == parent.childCount - 1 || !mark.isInSet(parent.child(index + 1).marks)) 335 })) { 336 let [_, rest, trail] = /^(.*?)(\s*)$/m.exec(node.text!)! 337 if (trail) { 338 trailing = trail 339 node = rest ? (node as any).withText(rest) : null 340 if (!node) marks = active 341 } 342 } 343 let inner = marks.length ? marks[marks.length - 1] : null 344 let noEsc = inner && this.getMark(inner.type.name).escape === false 345 let len = marks.length - (noEsc ? 1 : 0) 346 347 // Try to reorder 'mixable' marks, such as em and strong, which 348 // in Markdown may be opened and closed in different order, so 349 // that order of the marks for the token matches the order in 350 // active. 351 outer: for (let i = 0; i < len; i++) { 352 let mark = marks[i] 353 if (!this.getMark(mark.type.name).mixable) break 354 for (let j = 0; j < active.length; j++) { 355 let other = active[j] 356 if (!this.getMark(other.type.name).mixable) break 357 if (mark.eq(other)) { 358 if (i > j) 359 marks = marks.slice(0, j).concat(mark).concat(marks.slice(j, i)).concat(marks.slice(i + 1, len)) 360 else if (j > i) 361 marks = marks.slice(0, i).concat(marks.slice(i + 1, j)).concat(mark).concat(marks.slice(j, len)) 362 continue outer 363 } 364 } 365 } 366 367 // Find the prefix of the mark set that didn't change 368 let keep = 0 369 while (keep < Math.min(active.length, len) && marks[keep].eq(active[keep])) ++keep 370 371 // Close the marks that need to be closed 372 while (keep < active.length) 373 this.text(this.markString(active.pop()!, false, parent, index), false) 374 375 // Output any previously expelled trailing whitespace outside the marks 376 if (leading) this.text(leading) 377 378 // Open the marks that need to be opened 379 if (node) { 380 while (active.length < len) { 381 let add = marks[active.length] 382 active.push(add) 383 this.text(this.markString(add, true, parent, index), false) 384 this.atBlockStart = false 385 } 386 387 // Render the node. Special case code marks, since their content 388 // may not be escaped. 389 if (noEsc && node.isText) 390 this.text(this.markString(inner!, true, parent, index) + node.text + 391 this.markString(inner!, false, parent, index + 1), false) 392 else 393 this.render(node, parent, index) 394 this.atBlockStart = false 395 } 396 397 // After the first non-empty text node is rendered, the end of output 398 // is no longer at block start. 399 // 400 // FIXME: If a non-text node writes something to the output for this 401 // block, the end of output is also no longer at block start. But how 402 // can we detect that? 403 if (node?.isText && node.nodeSize > 0) { 404 this.atBlockStart = false 405 } 406 } 407 parent.forEach(progress) 408 progress(null, 0, parent.childCount) 409 this.atBlockStart = false 410 } 411 412 /// Render a node's content as a list. `delim` should be the extra 413 /// indentation added to all lines except the first in an item, 414 /// `firstDelim` is a function going from an item index to a 415 /// delimiter for the first line of the item. 416 renderList(node: Node, delim: string, firstDelim: (index: number) => string) { 417 if (this.closed && this.closed.type == node.type) 418 this.flushClose(3) 419 else if (this.inTightList) 420 this.flushClose(1) 421 422 let isTight = typeof node.attrs.tight != "undefined" ? node.attrs.tight : this.options.tightLists 423 let prevTight = this.inTightList 424 this.inTightList = isTight 425 node.forEach((child, _, i) => { 426 if (i && isTight) this.flushClose(1) 427 this.wrapBlock(delim, firstDelim(i), node, () => this.render(child, node, i)) 428 }) 429 this.inTightList = prevTight 430 } 431 432 /// Escape the given string so that it can safely appear in Markdown 433 /// content. If `startOfLine` is true, also escape characters that 434 /// have special meaning only at the start of the line. 435 esc(str: string, startOfLine = false) { 436 str = str.replace( 437 /[`*\\~\[\]_]/g, 438 (m, i) => m == "_" && i > 0 && i + 1 < str.length && str[i-1].match(/\w/) && str[i+1].match(/\w/) ? m : "\\" + m 439 ) 440 if (startOfLine) str = str.replace(/^(\+[ ]|[\-*>])/, "\\$&").replace(/^(\s*)(#{1,6})(\s|$)/, '$1\\$2$3').replace(/^(\s*\d+)\.\s/, "$1\\. ") 441 if (this.options.escapeExtraCharacters) str = str.replace(this.options.escapeExtraCharacters, "\\$&") 442 return str 443 } 444 445 /// @internal 446 quote(str: string) { 447 let wrap = str.indexOf('"') == -1 ? '""' : str.indexOf("'") == -1 ? "''" : "()" 448 return wrap[0] + str + wrap[1] 449 } 450 451 /// Repeat the given string `n` times. 452 repeat(str: string, n: number) { 453 let out = "" 454 for (let i = 0; i < n; i++) out += str 455 return out 456 } 457 458 /// Get the markdown string for a given opening or closing mark. 459 markString(mark: Mark, open: boolean, parent: Node, index: number) { 460 let info = this.getMark(mark.type.name) 461 let value = open ? info.open : info.close 462 return typeof value == "string" ? value : value(this, mark, parent, index) 463 } 464 465 /// Get leading and trailing whitespace from a string. Values of 466 /// leading or trailing property of the return object will be undefined 467 /// if there is no match. 468 getEnclosingWhitespace(text: string): {leading?: string, trailing?: string} { 469 return { 470 leading: (text.match(/^(\s+)/) || [undefined])[0], 471 trailing: (text.match(/(\s+)$/) || [undefined])[0] 472 } 473 } 474 }