tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

to_markdown.ts (18073B)


      1 import {Node, Mark} from "prosemirror-model"
      2 
      3 type MarkSerializerSpec = {
      4  /// The string that should appear before a piece of content marked
      5  /// by this mark, either directly or as a function that returns an
      6  /// appropriate string.
      7  open: string | ((state: MarkdownSerializerState, mark: Mark, parent: Node, index: number) => string),
      8  /// The string that should appear after a piece of content marked by
      9  /// this mark.
     10  close: string | ((state: MarkdownSerializerState, mark: Mark, parent: Node, index: number) => string),
     11  /// When `true`, this indicates that the order in which the mark's
     12  /// opening and closing syntax appears relative to other mixable
     13  /// marks can be varied. (For example, you can say `**a *b***` and
     14  /// `*a **b***`, but not `` `a *b*` ``.)
     15  mixable?: boolean,
     16  /// When enabled, causes the serializer to move enclosing whitespace
     17  /// from inside the marks to outside the marks. This is necessary
     18  /// for emphasis marks as CommonMark does not permit enclosing
     19  /// whitespace inside emphasis marks, see:
     20  /// http:///spec.commonmark.org/0.26/#example-330
     21  expelEnclosingWhitespace?: boolean,
     22  /// Can be set to `false` to disable character escaping in a mark. A
     23  /// non-escaping mark has to have the highest precedence (must
     24  /// always be the innermost mark).
     25  escape?: boolean
     26 }
     27 
     28 const blankMark: MarkSerializerSpec = {open: "", close: "", mixable: true}
     29 
     30 /// A specification for serializing a ProseMirror document as
     31 /// Markdown/CommonMark text.
     32 export class MarkdownSerializer {
     33  /// Construct a serializer with the given configuration. The `nodes`
     34  /// object should map node names in a given schema to function that
     35  /// take a serializer state and such a node, and serialize the node.
     36  constructor(
     37    /// The node serializer functions for this serializer.
     38    readonly nodes: {[node: string]: (state: MarkdownSerializerState, node: Node, parent: Node, index: number) => void},
     39    /// The mark serializer info.
     40    readonly marks: {[mark: string]: MarkSerializerSpec},
     41    readonly options: {
     42      /// Extra characters can be added for escaping. This is passed
     43      /// directly to String.replace(), and the matching characters are
     44      /// preceded by a backslash.
     45      escapeExtraCharacters?: RegExp,
     46      /// Specify the node name of hard breaks.
     47      /// Defaults to "hard_break"
     48      hardBreakNodeName?: string,
     49      /// By default, the serializer raises an error when it finds a
     50      /// node or mark type for which no serializer is defined. Set
     51      /// this to `false` to make it just ignore such elements,
     52      /// rendering only their content.
     53      strict?: boolean
     54    } = {}
     55  ) {}
     56 
     57  /// Serialize the content of the given node to
     58  /// [CommonMark](http://commonmark.org/).
     59  serialize(content: Node, options: {
     60    /// Whether to render lists in a tight style. This can be overridden
     61    /// on a node level by specifying a tight attribute on the node.
     62    /// Defaults to false.
     63    tightLists?: boolean
     64  } = {}) {
     65    options = Object.assign({}, this.options, options)
     66    let state = new MarkdownSerializerState(this.nodes, this.marks, options)
     67    state.renderContent(content)
     68    return state.out
     69  }
     70 }
     71 
     72 /// A serializer for the [basic schema](#schema).
     73 export const defaultMarkdownSerializer = new MarkdownSerializer({
     74  blockquote(state, node) {
     75    state.wrapBlock("> ", null, node, () => state.renderContent(node))
     76  },
     77  code_block(state, node) {
     78    // Make sure the front matter fences are longer than any dash sequence within it
     79    const backticks = node.textContent.match(/`{3,}/gm)
     80    const fence = backticks ? (backticks.sort().slice(-1)[0] + "`") : "```"
     81 
     82    state.write(fence + (node.attrs.params || "") + "\n")
     83    state.text(node.textContent, false)
     84    // Add a newline to the current content before adding closing marker
     85    state.write("\n")
     86    state.write(fence)
     87    state.closeBlock(node)
     88  },
     89  heading(state, node) {
     90    state.write(state.repeat("#", node.attrs.level) + " ")
     91    state.renderInline(node, false)
     92    state.closeBlock(node)
     93  },
     94  horizontal_rule(state, node) {
     95    state.write(node.attrs.markup || "---")
     96    state.closeBlock(node)
     97  },
     98  bullet_list(state, node) {
     99    state.renderList(node, "  ", () => (node.attrs.bullet || "*") + " ")
    100  },
    101  ordered_list(state, node) {
    102    let start = node.attrs.order || 1
    103    let maxW = String(start + node.childCount - 1).length
    104    let space = state.repeat(" ", maxW + 2)
    105    state.renderList(node, space, i => {
    106      let nStr = String(start + i)
    107      return state.repeat(" ", maxW - nStr.length) + nStr + ". "
    108    })
    109  },
    110  list_item(state, node) {
    111    state.renderContent(node)
    112  },
    113  paragraph(state, node) {
    114    state.renderInline(node)
    115    state.closeBlock(node)
    116  },
    117 
    118  image(state, node) {
    119    state.write("![" + state.esc(node.attrs.alt || "") + "](" + node.attrs.src.replace(/[\(\)]/g, "\\$&") +
    120                (node.attrs.title ? ' "' + node.attrs.title.replace(/"/g, '\\"') + '"' : "") + ")")
    121  },
    122  hard_break(state, node, parent, index) {
    123    for (let i = index + 1; i < parent.childCount; i++)
    124      if (parent.child(i).type != node.type) {
    125        state.write("\\\n")
    126        return
    127      }
    128  },
    129  text(state, node) {
    130    state.text(node.text!, !state.inAutolink)
    131  }
    132 }, {
    133  em: {open: "*", close: "*", mixable: true, expelEnclosingWhitespace: true},
    134  strong: {open: "**", close: "**", mixable: true, expelEnclosingWhitespace: true},
    135  link: {
    136    open(state, mark, parent, index) {
    137      state.inAutolink = isPlainURL(mark, parent, index)
    138      return state.inAutolink ? "<" : "["
    139    },
    140    close(state, mark, parent, index) {
    141      let {inAutolink} = state
    142      state.inAutolink = undefined
    143      return inAutolink ? ">"
    144        : "](" + mark.attrs.href.replace(/[\(\)"]/g, "\\$&") + (mark.attrs.title ? ` "${mark.attrs.title.replace(/"/g, '\\"')}"` : "") + ")"
    145    },
    146    mixable: true
    147  },
    148  code: {open(_state, _mark, parent, index) { return backticksFor(parent.child(index), -1) },
    149         close(_state, _mark, parent, index) { return backticksFor(parent.child(index - 1), 1) },
    150         escape: false}
    151 })
    152 
    153 function backticksFor(node: Node, side: number) {
    154  let ticks = /`+/g, m, len = 0
    155  if (node.isText) while (m = ticks.exec(node.text!)) len = Math.max(len, m[0].length)
    156  let result = len > 0 && side > 0 ? " `" : "`"
    157  for (let i = 0; i < len; i++) result += "`"
    158  if (len > 0 && side < 0) result += " "
    159  return result
    160 }
    161 
    162 function isPlainURL(link: Mark, parent: Node, index: number) {
    163  if (link.attrs.title || !/^\w+:/.test(link.attrs.href)) return false
    164  let content = parent.child(index)
    165  if (!content.isText || content.text != link.attrs.href || content.marks[content.marks.length - 1] != link) return false
    166  return index == parent.childCount - 1 || !link.isInSet(parent.child(index + 1).marks)
    167 }
    168 
    169 /// This is an object used to track state and expose
    170 /// methods related to markdown serialization. Instances are passed to
    171 /// node and mark serialization methods (see `toMarkdown`).
    172 export class MarkdownSerializerState {
    173  /// @internal
    174  delim: string = ""
    175  /// @internal
    176  out: string = ""
    177  /// @internal
    178  closed: Node | null = null
    179  /// @internal
    180  inAutolink: boolean | undefined = undefined
    181  /// @internal
    182  atBlockStart: boolean = false
    183  /// @internal
    184  inTightList: boolean = false
    185 
    186  /// @internal
    187  constructor(
    188    /// @internal
    189    readonly nodes: {[node: string]: (state: MarkdownSerializerState, node: Node, parent: Node, index: number) => void},
    190    /// @internal
    191    readonly marks: {[mark: string]: MarkSerializerSpec},
    192    /// The options passed to the serializer.
    193    readonly options: {tightLists?: boolean, escapeExtraCharacters?: RegExp, hardBreakNodeName?: string, strict?: boolean}
    194  ) {
    195    if (typeof this.options.tightLists == "undefined")
    196      this.options.tightLists = false
    197    if (typeof this.options.hardBreakNodeName == "undefined")
    198      this.options.hardBreakNodeName = "hard_break"
    199  }
    200 
    201  /// @internal
    202  flushClose(size: number = 2) {
    203    if (this.closed) {
    204      if (!this.atBlank()) this.out += "\n"
    205      if (size > 1) {
    206        let delimMin = this.delim
    207        let trim = /\s+$/.exec(delimMin)
    208        if (trim) delimMin = delimMin.slice(0, delimMin.length - trim[0].length)
    209        for (let i = 1; i < size; i++)
    210          this.out += delimMin + "\n"
    211      }
    212      this.closed = null
    213    }
    214  }
    215 
    216  /// @internal
    217  getMark(name: string) {
    218    let info = this.marks[name]
    219    if (!info) {
    220      if (this.options.strict !== false)
    221        throw new Error(`Mark type \`${name}\` not supported by Markdown renderer`)
    222      info = blankMark
    223    }
    224    return info
    225  }
    226 
    227  /// Render a block, prefixing each line with `delim`, and the first
    228  /// line in `firstDelim`. `node` should be the node that is closed at
    229  /// the end of the block, and `f` is a function that renders the
    230  /// content of the block.
    231  wrapBlock(delim: string, firstDelim: string | null, node: Node, f: () => void) {
    232    let old = this.delim
    233    this.write(firstDelim != null ? firstDelim : delim)
    234    this.delim += delim
    235    f()
    236    this.delim = old
    237    this.closeBlock(node)
    238  }
    239 
    240  /// @internal
    241  atBlank() {
    242    return /(^|\n)$/.test(this.out)
    243  }
    244 
    245  /// Ensure the current content ends with a newline.
    246  ensureNewLine() {
    247    if (!this.atBlank()) this.out += "\n"
    248  }
    249 
    250  /// Prepare the state for writing output (closing closed paragraphs,
    251  /// adding delimiters, and so on), and then optionally add content
    252  /// (unescaped) to the output.
    253  write(content?: string) {
    254    this.flushClose()
    255    if (this.delim && this.atBlank())
    256      this.out += this.delim
    257    if (content) this.out += content
    258  }
    259 
    260  /// Close the block for the given node.
    261  closeBlock(node: Node) {
    262    this.closed = node
    263  }
    264 
    265  /// Add the given text to the document. When escape is not `false`,
    266  /// it will be escaped.
    267  text(text: string, escape = true) {
    268    let lines = text.split("\n")
    269    for (let i = 0; i < lines.length; i++) {
    270      this.write()
    271      // Escape exclamation marks in front of links
    272      if (!escape && lines[i][0] == "[" && /(^|[^\\])\!$/.test(this.out))
    273        this.out = this.out.slice(0, this.out.length - 1) + "\\!"
    274      this.out += escape ? this.esc(lines[i], this.atBlockStart) : lines[i]
    275      if (i != lines.length - 1) this.out += "\n"
    276    }
    277  }
    278 
    279  /// Render the given node as a block.
    280  render(node: Node, parent: Node, index: number) {
    281    if (this.nodes[node.type.name]) {
    282      this.nodes[node.type.name](this, node, parent, index)
    283    } else {
    284      if (this.options.strict !== false) {
    285        throw new Error("Token type `" + node.type.name + "` not supported by Markdown renderer")
    286      } else if (!node.type.isLeaf) {
    287        if (node.type.inlineContent) this.renderInline(node)
    288        else this.renderContent(node)
    289        if (node.isBlock) this.closeBlock(node)
    290      }
    291    }
    292  }
    293 
    294  /// Render the contents of `parent` as block nodes.
    295  renderContent(parent: Node) {
    296    parent.forEach((node, _, i) => this.render(node, parent, i))
    297  }
    298 
    299  /// Render the contents of `parent` as inline content.
    300  renderInline(parent: Node, fromBlockStart = true) {
    301    this.atBlockStart = fromBlockStart
    302    let active: Mark[] = [], trailing = ""
    303    let progress = (node: Node | null, offset: number, index: number) => {
    304      let marks = node ? node.marks : []
    305 
    306      // Remove marks from `hard_break` that are the last node inside
    307      // that mark to prevent parser edge cases with new lines just
    308      // before closing marks.
    309      if (node && node.type.name === this.options.hardBreakNodeName)
    310        marks = marks.filter(m => {
    311          if (index + 1 == parent.childCount) return false
    312          let next = parent.child(index + 1)
    313          return m.isInSet(next.marks) && (!next.isText || /\S/.test(next.text!))
    314        })
    315 
    316      let leading = trailing
    317      trailing = ""
    318      // If whitespace has to be expelled from the node, adjust
    319      // leading and trailing accordingly.
    320      if (node && node.isText && marks.some(mark => {
    321        let info = this.getMark(mark.type.name)
    322        return info && info.expelEnclosingWhitespace && !mark.isInSet(active)
    323      })) {
    324        let [_, lead, rest] = /^(\s*)(.*)$/m.exec(node.text!)!
    325        if (lead) {
    326          leading += lead
    327          node = rest ? (node as any).withText(rest) : null
    328          if (!node) marks = active
    329        }
    330      }
    331      if (node && node.isText && marks.some(mark => {
    332        let info = this.getMark(mark.type.name)
    333        return info && info.expelEnclosingWhitespace &&
    334          (index == parent.childCount - 1 || !mark.isInSet(parent.child(index + 1).marks))
    335      })) {
    336        let [_, rest, trail] = /^(.*?)(\s*)$/m.exec(node.text!)!
    337        if (trail) {
    338          trailing = trail
    339          node = rest ? (node as any).withText(rest) : null
    340          if (!node) marks = active
    341        }
    342      }
    343      let inner = marks.length ? marks[marks.length - 1] : null
    344      let noEsc = inner && this.getMark(inner.type.name).escape === false
    345      let len = marks.length - (noEsc ? 1 : 0)
    346 
    347      // Try to reorder 'mixable' marks, such as em and strong, which
    348      // in Markdown may be opened and closed in different order, so
    349      // that order of the marks for the token matches the order in
    350      // active.
    351      outer: for (let i = 0; i < len; i++) {
    352        let mark = marks[i]
    353        if (!this.getMark(mark.type.name).mixable) break
    354        for (let j = 0; j < active.length; j++) {
    355          let other = active[j]
    356          if (!this.getMark(other.type.name).mixable) break
    357          if (mark.eq(other)) {
    358            if (i > j)
    359              marks = marks.slice(0, j).concat(mark).concat(marks.slice(j, i)).concat(marks.slice(i + 1, len))
    360            else if (j > i)
    361              marks = marks.slice(0, i).concat(marks.slice(i + 1, j)).concat(mark).concat(marks.slice(j, len))
    362            continue outer
    363          }
    364        }
    365      }
    366 
    367      // Find the prefix of the mark set that didn't change
    368      let keep = 0
    369      while (keep < Math.min(active.length, len) && marks[keep].eq(active[keep])) ++keep
    370 
    371      // Close the marks that need to be closed
    372      while (keep < active.length)
    373        this.text(this.markString(active.pop()!, false, parent, index), false)
    374 
    375      // Output any previously expelled trailing whitespace outside the marks
    376      if (leading) this.text(leading)
    377 
    378      // Open the marks that need to be opened
    379      if (node) {
    380        while (active.length < len) {
    381          let add = marks[active.length]
    382          active.push(add)
    383          this.text(this.markString(add, true, parent, index), false)
    384          this.atBlockStart = false
    385        }
    386 
    387        // Render the node. Special case code marks, since their content
    388        // may not be escaped.
    389        if (noEsc && node.isText)
    390          this.text(this.markString(inner!, true, parent, index) + node.text +
    391                    this.markString(inner!, false, parent, index + 1), false)
    392        else
    393          this.render(node, parent, index)
    394        this.atBlockStart = false
    395      }
    396 
    397      // After the first non-empty text node is rendered, the end of output
    398      // is no longer at block start.
    399      //
    400      // FIXME: If a non-text node writes something to the output for this
    401      // block, the end of output is also no longer at block start. But how
    402      // can we detect that?
    403      if (node?.isText && node.nodeSize > 0) {
    404        this.atBlockStart = false
    405      }
    406    }
    407    parent.forEach(progress)
    408    progress(null, 0, parent.childCount)
    409    this.atBlockStart = false
    410  }
    411 
    412  /// Render a node's content as a list. `delim` should be the extra
    413  /// indentation added to all lines except the first in an item,
    414  /// `firstDelim` is a function going from an item index to a
    415  /// delimiter for the first line of the item.
    416  renderList(node: Node, delim: string, firstDelim: (index: number) => string) {
    417    if (this.closed && this.closed.type == node.type)
    418      this.flushClose(3)
    419    else if (this.inTightList)
    420      this.flushClose(1)
    421 
    422    let isTight = typeof node.attrs.tight != "undefined" ? node.attrs.tight : this.options.tightLists
    423    let prevTight = this.inTightList
    424    this.inTightList = isTight
    425    node.forEach((child, _, i) => {
    426      if (i && isTight) this.flushClose(1)
    427      this.wrapBlock(delim, firstDelim(i), node, () => this.render(child, node, i))
    428    })
    429    this.inTightList = prevTight
    430  }
    431 
    432  /// Escape the given string so that it can safely appear in Markdown
    433  /// content. If `startOfLine` is true, also escape characters that
    434  /// have special meaning only at the start of the line.
    435  esc(str: string, startOfLine = false) {
    436    str = str.replace(
    437      /[`*\\~\[\]_]/g,
    438      (m, i) => m == "_" && i > 0 && i + 1 < str.length && str[i-1].match(/\w/) && str[i+1].match(/\w/) ?  m : "\\" + m
    439    )
    440    if (startOfLine) str = str.replace(/^(\+[ ]|[\-*>])/, "\\$&").replace(/^(\s*)(#{1,6})(\s|$)/, '$1\\$2$3').replace(/^(\s*\d+)\.\s/, "$1\\. ")
    441    if (this.options.escapeExtraCharacters) str = str.replace(this.options.escapeExtraCharacters, "\\$&")
    442    return str
    443  }
    444 
    445  /// @internal
    446  quote(str: string) {
    447    let wrap = str.indexOf('"') == -1 ? '""' : str.indexOf("'") == -1 ? "''" : "()"
    448    return wrap[0] + str + wrap[1]
    449  }
    450 
    451  /// Repeat the given string `n` times.
    452  repeat(str: string, n: number) {
    453    let out = ""
    454    for (let i = 0; i < n; i++) out += str
    455    return out
    456  }
    457 
    458  /// Get the markdown string for a given opening or closing mark.
    459  markString(mark: Mark, open: boolean, parent: Node, index: number) {
    460    let info = this.getMark(mark.type.name)
    461    let value = open ? info.open : info.close
    462    return typeof value == "string" ? value : value(this, mark, parent, index)
    463  }
    464 
    465  /// Get leading and trailing whitespace from a string. Values of
    466  /// leading or trailing property of the return object will be undefined
    467  /// if there is no match.
    468  getEnclosingWhitespace(text: string): {leading?: string, trailing?: string} {
    469    return {
    470      leading: (text.match(/^(\s+)/) || [undefined])[0],
    471      trailing: (text.match(/(\s+)$/) || [undefined])[0]
    472    }
    473  }
    474 }