dkforest

A forum and chat platform (onion)
git clone https://git.dasho.dev/n0tr1v/dkforest.git
Log | Files | Refs | LICENSE

markdown.go (26519B)


      1 // Blackfriday Markdown Processor
      2 // Available at http://github.com/russross/blackfriday
      3 //
      4 // Copyright © 2011 Russ Ross <russ@russross.com>.
      5 // Distributed under the Simplified BSD License.
      6 // See README.md for details.
      7 
      8 package blackfriday
      9 
     10 import (
     11 	"bytes"
     12 	"fmt"
     13 	"io"
     14 	"strings"
     15 	"unicode/utf8"
     16 )
     17 
     18 //
     19 // Markdown parsing and processing
     20 //
     21 
     22 // Version string of the package. Appears in the rendered document when
     23 // CompletePage flag is on.
     24 const Version = "2.0"
     25 
     26 // Extensions is a bitwise or'ed collection of enabled Blackfriday's
     27 // extensions.
     28 type Extensions int
     29 
     30 // These are the supported markdown parsing extensions.
     31 // OR these values together to select multiple extensions.
     32 const (
     33 	NoExtensions           Extensions = 0
     34 	NoIntraEmphasis        Extensions = 1 << iota // Ignore emphasis markers inside words
     35 	Tables                                        // Render tables
     36 	FencedCode                                    // Render fenced code blocks
     37 	Autolink                                      // Detect embedded URLs that are not explicitly marked
     38 	Strikethrough                                 // Strikethrough text using ~~test~~
     39 	LaxHTMLBlocks                                 // Loosen up HTML block parsing rules
     40 	SpaceHeadings                                 // Be strict about prefix heading rules
     41 	HardLineBreak                                 // Translate newlines into line breaks
     42 	TabSizeEight                                  // Expand tabs to eight spaces instead of four
     43 	Footnotes                                     // Pandoc-style footnotes
     44 	NoEmptyLineBeforeBlock                        // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block
     45 	HeadingIDs                                    // specify heading IDs  with {#id}
     46 	Titleblock                                    // Titleblock ala pandoc
     47 	AutoHeadingIDs                                // Create the heading ID from the text
     48 	BackslashLineBreak                            // Translate trailing backslashes into line breaks
     49 	DefinitionLists                               // Render definition lists
     50 	NoLink                                        // No links/images
     51 	ManualLineBreak                               //
     52 
     53 	CommonHTMLFlags HTMLFlags = UseXHTML | Smartypants |
     54 		SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes
     55 
     56 	CommonExtensions Extensions = NoIntraEmphasis | Tables | FencedCode |
     57 		Autolink | Strikethrough | SpaceHeadings | HeadingIDs |
     58 		BackslashLineBreak | DefinitionLists
     59 )
     60 
     61 // ListType contains bitwise or'ed flags for list and list item objects.
     62 type ListType int
     63 
     64 // These are the possible flag values for the ListItem renderer.
     65 // Multiple flag values may be ORed together.
     66 // These are mostly of interest if you are writing a new output format.
     67 const (
     68 	ListTypeOrdered ListType = 1 << iota
     69 	ListTypeDefinition
     70 	ListTypeTerm
     71 
     72 	ListItemContainsBlock
     73 	ListItemBeginningOfList // TODO: figure out if this is of any use now
     74 	ListItemEndOfList
     75 )
     76 
     77 // CellAlignFlags holds a type of alignment in a table cell.
     78 type CellAlignFlags int
     79 
     80 // These are the possible flag values for the table cell renderer.
     81 // Only a single one of these values will be used; they are not ORed together.
     82 // These are mostly of interest if you are writing a new output format.
     83 const (
     84 	TableAlignmentLeft CellAlignFlags = 1 << iota
     85 	TableAlignmentRight
     86 	TableAlignmentCenter = (TableAlignmentLeft | TableAlignmentRight)
     87 )
     88 
     89 // The size of a tab stop.
     90 const (
     91 	TabSizeDefault = 4
     92 	TabSizeDouble  = 8
     93 )
     94 
     95 // blockTags is a set of tags that are recognized as HTML block tags.
     96 // Any of these can be included in markdown text without special escaping.
     97 var blockTags = map[string]struct{}{
     98 	"blockquote": {},
     99 	"del":        {},
    100 	"div":        {},
    101 	"dl":         {},
    102 	"fieldset":   {},
    103 	"form":       {},
    104 	"h1":         {},
    105 	"h2":         {},
    106 	"h3":         {},
    107 	"h4":         {},
    108 	"h5":         {},
    109 	"h6":         {},
    110 	"iframe":     {},
    111 	"ins":        {},
    112 	"math":       {},
    113 	"noscript":   {},
    114 	"ol":         {},
    115 	"pre":        {},
    116 	"p":          {},
    117 	"script":     {},
    118 	"style":      {},
    119 	"table":      {},
    120 	"ul":         {},
    121 
    122 	// HTML5
    123 	"address":    {},
    124 	"article":    {},
    125 	"aside":      {},
    126 	"canvas":     {},
    127 	"figcaption": {},
    128 	"figure":     {},
    129 	"footer":     {},
    130 	"header":     {},
    131 	"hgroup":     {},
    132 	"main":       {},
    133 	"nav":        {},
    134 	"output":     {},
    135 	"progress":   {},
    136 	"section":    {},
    137 	"video":      {},
    138 }
    139 
    140 // Renderer is the rendering interface. This is mostly of interest if you are
    141 // implementing a new rendering format.
    142 //
    143 // Only an HTML implementation is provided in this repository, see the README
    144 // for external implementations.
    145 type Renderer interface {
    146 	// RenderNode is the main rendering method. It will be called once for
    147 	// every leaf node and twice for every non-leaf node (first with
    148 	// entering=true, then with entering=false). The method should write its
    149 	// rendition of the node to the supplied writer w.
    150 	RenderNode(w io.Writer, node *Node, entering bool) WalkStatus
    151 
    152 	// RenderHeader is a method that allows the renderer to produce some
    153 	// content preceding the main body of the output document. The header is
    154 	// understood in the broad sense here. For example, the default HTML
    155 	// renderer will write not only the HTML document preamble, but also the
    156 	// table of contents if it was requested.
    157 	//
    158 	// The method will be passed an entire document tree, in case a particular
    159 	// implementation needs to inspect it to produce output.
    160 	//
    161 	// The output should be written to the supplied writer w. If your
    162 	// implementation has no header to write, supply an empty implementation.
    163 	RenderHeader(w io.Writer, ast *Node)
    164 
    165 	// RenderFooter is a symmetric counterpart of RenderHeader.
    166 	RenderFooter(w io.Writer, ast *Node)
    167 }
    168 
    169 // Callback functions for inline parsing. One such function is defined
    170 // for each character that triggers a response when parsing inline data.
    171 type inlineParser func(p *Markdown, data []byte, offset int) (int, *Node)
    172 
    173 // Markdown is a type that holds extensions and the runtime state used by
    174 // Parse, and the renderer. You can not use it directly, construct it with New.
    175 type Markdown struct {
    176 	renderer          Renderer
    177 	referenceOverride ReferenceOverrideFunc
    178 	refs              map[string]*reference
    179 	inlineCallback    [256]inlineParser
    180 	extensions        Extensions
    181 	nesting           int
    182 	maxNesting        int
    183 	insideLink        bool
    184 
    185 	// Footnotes need to be ordered as well as available to quickly check for
    186 	// presence. If a ref is also a footnote, it's stored both in refs and here
    187 	// in notes. Slice is nil if footnotes not enabled.
    188 	notes []*reference
    189 
    190 	doc                  *Node
    191 	tip                  *Node // = doc
    192 	oldTip               *Node
    193 	lastMatchedContainer *Node // = doc
    194 	allClosed            bool
    195 }
    196 
    197 func (p *Markdown) getRef(refid string) (ref *reference, found bool) {
    198 	if p.referenceOverride != nil {
    199 		r, overridden := p.referenceOverride(refid)
    200 		if overridden {
    201 			if r == nil {
    202 				return nil, false
    203 			}
    204 			return &reference{
    205 				link:     []byte(r.Link),
    206 				title:    []byte(r.Title),
    207 				noteID:   0,
    208 				hasBlock: false,
    209 				text:     []byte(r.Text)}, true
    210 		}
    211 	}
    212 	// refs are case insensitive
    213 	ref, found = p.refs[strings.ToLower(refid)]
    214 	return ref, found
    215 }
    216 
    217 func (p *Markdown) finalize(block *Node) {
    218 	above := block.Parent
    219 	block.open = false
    220 	p.tip = above
    221 }
    222 
    223 func (p *Markdown) addChild(node NodeType, offset uint32) *Node {
    224 	return p.addExistingChild(NewNode(node), offset)
    225 }
    226 
    227 func (p *Markdown) addExistingChild(node *Node, offset uint32) *Node {
    228 	for !p.tip.canContain(node.Type) {
    229 		p.finalize(p.tip)
    230 	}
    231 	p.tip.AppendChild(node)
    232 	p.tip = node
    233 	return node
    234 }
    235 
    236 func (p *Markdown) closeUnmatchedBlocks() {
    237 	if !p.allClosed {
    238 		for p.oldTip != p.lastMatchedContainer {
    239 			parent := p.oldTip.Parent
    240 			p.finalize(p.oldTip)
    241 			p.oldTip = parent
    242 		}
    243 		p.allClosed = true
    244 	}
    245 }
    246 
    247 //
    248 //
    249 // Public interface
    250 //
    251 //
    252 
    253 // Reference represents the details of a link.
    254 // See the documentation in Options for more details on use-case.
    255 type Reference struct {
    256 	// Link is usually the URL the reference points to.
    257 	Link string
    258 	// Title is the alternate text describing the link in more detail.
    259 	Title string
    260 	// Text is the optional text to override the ref with if the syntax used was
    261 	// [refid][]
    262 	Text string
    263 }
    264 
    265 // ReferenceOverrideFunc is expected to be called with a reference string and
    266 // return either a valid Reference type that the reference string maps to or
    267 // nil. If overridden is false, the default reference logic will be executed.
    268 // See the documentation in Options for more details on use-case.
    269 type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool)
    270 
    271 // New constructs a Markdown processor. You can use the same With* functions as
    272 // for Run() to customize parser's behavior and the renderer.
    273 func New(opts ...Option) *Markdown {
    274 	var p Markdown
    275 	for _, opt := range opts {
    276 		opt(&p)
    277 	}
    278 	p.refs = make(map[string]*reference)
    279 	p.maxNesting = 16
    280 	p.insideLink = false
    281 	docNode := NewNode(Document)
    282 	p.doc = docNode
    283 	p.tip = docNode
    284 	p.oldTip = docNode
    285 	p.lastMatchedContainer = docNode
    286 	p.allClosed = true
    287 	// register inline parsers
    288 	p.inlineCallback[' '] = maybeLineBreak
    289 	p.inlineCallback['#'] = censored
    290 	p.inlineCallback['*'] = emphasis
    291 	//p.inlineCallback['_'] = emphasis
    292 	if p.extensions&Strikethrough != 0 {
    293 		p.inlineCallback['~'] = emphasis
    294 	}
    295 	p.inlineCallback['`'] = codeSpan
    296 	p.inlineCallback['\n'] = lineBreak
    297 	if p.extensions&NoLink == 0 {
    298 		p.inlineCallback['['] = link
    299 	}
    300 	p.inlineCallback['<'] = leftAngle
    301 	p.inlineCallback['\\'] = escape
    302 	p.inlineCallback['&'] = entity
    303 	p.inlineCallback['!'] = maybeImage
    304 	p.inlineCallback['^'] = maybeInlineFootnote
    305 	if p.extensions&Autolink != 0 {
    306 		p.inlineCallback['h'] = maybeAutoLink
    307 		p.inlineCallback['m'] = maybeAutoLink
    308 		p.inlineCallback['f'] = maybeAutoLink
    309 		p.inlineCallback['H'] = maybeAutoLink
    310 		p.inlineCallback['M'] = maybeAutoLink
    311 		p.inlineCallback['F'] = maybeAutoLink
    312 	}
    313 	if p.extensions&Footnotes != 0 {
    314 		p.notes = make([]*reference, 0)
    315 	}
    316 	return &p
    317 }
    318 
    319 // Option customizes the Markdown processor's default behavior.
    320 type Option func(*Markdown)
    321 
    322 // WithRenderer allows you to override the default renderer.
    323 func WithRenderer(r Renderer) Option {
    324 	return func(p *Markdown) {
    325 		p.renderer = r
    326 	}
    327 }
    328 
    329 // WithExtensions allows you to pick some of the many extensions provided by
    330 // Blackfriday. You can bitwise OR them.
    331 func WithExtensions(e Extensions) Option {
    332 	return func(p *Markdown) {
    333 		p.extensions = e
    334 	}
    335 }
    336 
    337 // WithNoExtensions turns off all extensions and custom behavior.
    338 func WithNoExtensions() Option {
    339 	return func(p *Markdown) {
    340 		p.extensions = NoExtensions
    341 		p.renderer = NewHTMLRenderer(HTMLRendererParameters{
    342 			Flags: HTMLFlagsNone,
    343 		})
    344 	}
    345 }
    346 
    347 // WithRefOverride sets an optional function callback that is called every
    348 // time a reference is resolved.
    349 //
    350 // In Markdown, the link reference syntax can be made to resolve a link to
    351 // a reference instead of an inline URL, in one of the following ways:
    352 //
    353 //  * [link text][refid]
    354 //  * [refid][]
    355 //
    356 // Usually, the refid is defined at the bottom of the Markdown document. If
    357 // this override function is provided, the refid is passed to the override
    358 // function first, before consulting the defined refids at the bottom. If
    359 // the override function indicates an override did not occur, the refids at
    360 // the bottom will be used to fill in the link details.
    361 func WithRefOverride(o ReferenceOverrideFunc) Option {
    362 	return func(p *Markdown) {
    363 		p.referenceOverride = o
    364 	}
    365 }
    366 
    367 // Run is the main entry point to Blackfriday. It parses and renders a
    368 // block of markdown-encoded text.
    369 //
    370 // The simplest invocation of Run takes one argument, input:
    371 //     output := Run(input)
    372 // This will parse the input with CommonExtensions enabled and render it with
    373 // the default HTMLRenderer (with CommonHTMLFlags).
    374 //
    375 // Variadic arguments opts can customize the default behavior. Since Markdown
    376 // type does not contain exported fields, you can not use it directly. Instead,
    377 // use the With* functions. For example, this will call the most basic
    378 // functionality, with no extensions:
    379 //     output := Run(input, WithNoExtensions())
    380 //
    381 // You can use any number of With* arguments, even contradicting ones. They
    382 // will be applied in order of appearance and the latter will override the
    383 // former:
    384 //     output := Run(input, WithNoExtensions(), WithExtensions(exts),
    385 //         WithRenderer(yourRenderer))
    386 func Run(input []byte, opts ...Option) []byte {
    387 	r := NewHTMLRenderer(HTMLRendererParameters{
    388 		Flags: CommonHTMLFlags,
    389 	})
    390 	optList := []Option{WithRenderer(r), WithExtensions(CommonExtensions)}
    391 	optList = append(optList, opts...)
    392 	parser := New(optList...)
    393 	ast := parser.Parse(input)
    394 	var buf bytes.Buffer
    395 	parser.renderer.RenderHeader(&buf, ast)
    396 	ast.Walk(func(node *Node, entering bool) WalkStatus {
    397 		return parser.renderer.RenderNode(&buf, node, entering)
    398 	})
    399 	parser.renderer.RenderFooter(&buf, ast)
    400 	return buf.Bytes()
    401 }
    402 
    403 // Parse is an entry point to the parsing part of Blackfriday. It takes an
    404 // input markdown document and produces a syntax tree for its contents. This
    405 // tree can then be rendered with a default or custom renderer, or
    406 // analyzed/transformed by the caller to whatever non-standard needs they have.
    407 // The return value is the root node of the syntax tree.
    408 func (p *Markdown) Parse(input []byte) *Node {
    409 	p.block(input)
    410 	// Walk the tree and finish up some of unfinished blocks
    411 	for p.tip != nil {
    412 		p.finalize(p.tip)
    413 	}
    414 	// Walk the tree again and process inline markdown in each block
    415 	p.doc.Walk(func(node *Node, entering bool) WalkStatus {
    416 		if node.Type == Paragraph || node.Type == Heading || node.Type == TableCell {
    417 			p.inline(node, node.content)
    418 			node.content = nil
    419 		}
    420 		return GoToNext
    421 	})
    422 	p.parseRefsToAST()
    423 	return p.doc
    424 }
    425 
    426 func (p *Markdown) parseRefsToAST() {
    427 	if p.extensions&Footnotes == 0 || len(p.notes) == 0 {
    428 		return
    429 	}
    430 	p.tip = p.doc
    431 	block := p.addBlock(List, nil)
    432 	block.IsFootnotesList = true
    433 	block.ListFlags = ListTypeOrdered
    434 	flags := ListItemBeginningOfList
    435 	// Note: this loop is intentionally explicit, not range-form. This is
    436 	// because the body of the loop will append nested footnotes to p.notes and
    437 	// we need to process those late additions. Range form would only walk over
    438 	// the fixed initial set.
    439 	for i := 0; i < len(p.notes); i++ {
    440 		ref := p.notes[i]
    441 		p.addExistingChild(ref.footnote, 0)
    442 		block := ref.footnote
    443 		block.ListFlags = flags | ListTypeOrdered
    444 		block.RefLink = ref.link
    445 		if ref.hasBlock {
    446 			flags |= ListItemContainsBlock
    447 			p.block(ref.title)
    448 		} else {
    449 			p.inline(block, ref.title)
    450 		}
    451 		flags &^= ListItemBeginningOfList | ListItemContainsBlock
    452 	}
    453 	above := block.Parent
    454 	finalizeList(block)
    455 	p.tip = above
    456 	block.Walk(func(node *Node, entering bool) WalkStatus {
    457 		if node.Type == Paragraph || node.Type == Heading {
    458 			p.inline(node, node.content)
    459 			node.content = nil
    460 		}
    461 		return GoToNext
    462 	})
    463 }
    464 
    465 //
    466 // Link references
    467 //
    468 // This section implements support for references that (usually) appear
    469 // as footnotes in a document, and can be referenced anywhere in the document.
    470 // The basic format is:
    471 //
    472 //    [1]: http://www.google.com/ "Google"
    473 //    [2]: http://www.github.com/ "Github"
    474 //
    475 // Anywhere in the document, the reference can be linked by referring to its
    476 // label, i.e., 1 and 2 in this example, as in:
    477 //
    478 //    This library is hosted on [Github][2], a git hosting site.
    479 //
    480 // Actual footnotes as specified in Pandoc and supported by some other Markdown
    481 // libraries such as php-markdown are also taken care of. They look like this:
    482 //
    483 //    This sentence needs a bit of further explanation.[^note]
    484 //
    485 //    [^note]: This is the explanation.
    486 //
    487 // Footnotes should be placed at the end of the document in an ordered list.
    488 // Finally, there are inline footnotes such as:
    489 //
    490 //    Inline footnotes^[Also supported.] provide a quick inline explanation,
    491 //    but are rendered at the bottom of the document.
    492 //
    493 
    494 // reference holds all information necessary for a reference-style links or
    495 // footnotes.
    496 //
    497 // Consider this markdown with reference-style links:
    498 //
    499 //     [link][ref]
    500 //
    501 //     [ref]: /url/ "tooltip title"
    502 //
    503 // It will be ultimately converted to this HTML:
    504 //
    505 //     <p><a href=\"/url/\" title=\"title\">link</a></p>
    506 //
    507 // And a reference structure will be populated as follows:
    508 //
    509 //     p.refs["ref"] = &reference{
    510 //         link: "/url/",
    511 //         title: "tooltip title",
    512 //     }
    513 //
    514 // Alternatively, reference can contain information about a footnote. Consider
    515 // this markdown:
    516 //
    517 //     Text needing a footnote.[^a]
    518 //
    519 //     [^a]: This is the note
    520 //
    521 // A reference structure will be populated as follows:
    522 //
    523 //     p.refs["a"] = &reference{
    524 //         link: "a",
    525 //         title: "This is the note",
    526 //         noteID: <some positive int>,
    527 //     }
    528 //
    529 // TODO: As you can see, it begs for splitting into two dedicated structures
    530 // for refs and for footnotes.
    531 type reference struct {
    532 	link     []byte
    533 	title    []byte
    534 	noteID   int // 0 if not a footnote ref
    535 	hasBlock bool
    536 	footnote *Node // a link to the Item node within a list of footnotes
    537 
    538 	text []byte // only gets populated by refOverride feature with Reference.Text
    539 }
    540 
    541 func (r *reference) String() string {
    542 	return fmt.Sprintf("{link: %q, title: %q, text: %q, noteID: %d, hasBlock: %v}",
    543 		r.link, r.title, r.text, r.noteID, r.hasBlock)
    544 }
    545 
    546 // Check whether or not data starts with a reference link.
    547 // If so, it is parsed and stored in the list of references
    548 // (in the render struct).
    549 // Returns the number of bytes to skip to move past it,
    550 // or zero if the first line is not a reference.
    551 func isReference(p *Markdown, data []byte, tabSize int) int {
    552 	// up to 3 optional leading spaces
    553 	if len(data) < 4 {
    554 		return 0
    555 	}
    556 	i := 0
    557 	for i < 3 && data[i] == ' ' {
    558 		i++
    559 	}
    560 
    561 	noteID := 0
    562 
    563 	// id part: anything but a newline between brackets
    564 	if data[i] != '[' {
    565 		return 0
    566 	}
    567 	i++
    568 	if p.extensions&Footnotes != 0 {
    569 		if i < len(data) && data[i] == '^' {
    570 			// we can set it to anything here because the proper noteIds will
    571 			// be assigned later during the second pass. It just has to be != 0
    572 			noteID = 1
    573 			i++
    574 		}
    575 	}
    576 	idOffset := i
    577 	for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' {
    578 		i++
    579 	}
    580 	if i >= len(data) || data[i] != ']' {
    581 		return 0
    582 	}
    583 	idEnd := i
    584 	// footnotes can have empty ID, like this: [^], but a reference can not be
    585 	// empty like this: []. Break early if it's not a footnote and there's no ID
    586 	if noteID == 0 && idOffset == idEnd {
    587 		return 0
    588 	}
    589 	// spacer: colon (space | tab)* newline? (space | tab)*
    590 	i++
    591 	if i >= len(data) || data[i] != ':' {
    592 		return 0
    593 	}
    594 	i++
    595 	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
    596 		i++
    597 	}
    598 	if i < len(data) && (data[i] == '\n' || data[i] == '\r') {
    599 		i++
    600 		if i < len(data) && data[i] == '\n' && data[i-1] == '\r' {
    601 			i++
    602 		}
    603 	}
    604 	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
    605 		i++
    606 	}
    607 	if i >= len(data) {
    608 		return 0
    609 	}
    610 
    611 	var (
    612 		linkOffset, linkEnd   int
    613 		titleOffset, titleEnd int
    614 		lineEnd               int
    615 		raw                   []byte
    616 		hasBlock              bool
    617 	)
    618 
    619 	if p.extensions&Footnotes != 0 && noteID != 0 {
    620 		linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize)
    621 		lineEnd = linkEnd
    622 	} else {
    623 		linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i)
    624 	}
    625 	if lineEnd == 0 {
    626 		return 0
    627 	}
    628 
    629 	// a valid ref has been found
    630 
    631 	ref := &reference{
    632 		noteID:   noteID,
    633 		hasBlock: hasBlock,
    634 	}
    635 
    636 	if noteID > 0 {
    637 		// reusing the link field for the id since footnotes don't have links
    638 		ref.link = data[idOffset:idEnd]
    639 		// if footnote, it's not really a title, it's the contained text
    640 		ref.title = raw
    641 	} else {
    642 		ref.link = data[linkOffset:linkEnd]
    643 		ref.title = data[titleOffset:titleEnd]
    644 	}
    645 
    646 	// id matches are case-insensitive
    647 	id := string(bytes.ToLower(data[idOffset:idEnd]))
    648 
    649 	p.refs[id] = ref
    650 
    651 	return lineEnd
    652 }
    653 
    654 func scanLinkRef(p *Markdown, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) {
    655 	// link: whitespace-free sequence, optionally between angle brackets
    656 	if data[i] == '<' {
    657 		i++
    658 	}
    659 	linkOffset = i
    660 	for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' {
    661 		i++
    662 	}
    663 	linkEnd = i
    664 	if data[linkOffset] == '<' && data[linkEnd-1] == '>' {
    665 		linkOffset++
    666 		linkEnd--
    667 	}
    668 
    669 	// optional spacer: (space | tab)* (newline | '\'' | '"' | '(' )
    670 	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
    671 		i++
    672 	}
    673 	if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' {
    674 		return
    675 	}
    676 
    677 	// compute end-of-line
    678 	if i >= len(data) || data[i] == '\r' || data[i] == '\n' {
    679 		lineEnd = i
    680 	}
    681 	if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' {
    682 		lineEnd++
    683 	}
    684 
    685 	// optional (space|tab)* spacer after a newline
    686 	if lineEnd > 0 {
    687 		i = lineEnd + 1
    688 		for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
    689 			i++
    690 		}
    691 	}
    692 
    693 	// optional title: any non-newline sequence enclosed in '"() alone on its line
    694 	if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') {
    695 		i++
    696 		titleOffset = i
    697 
    698 		// look for EOL
    699 		for i < len(data) && data[i] != '\n' && data[i] != '\r' {
    700 			i++
    701 		}
    702 		if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
    703 			titleEnd = i + 1
    704 		} else {
    705 			titleEnd = i
    706 		}
    707 
    708 		// step back
    709 		i--
    710 		for i > titleOffset && (data[i] == ' ' || data[i] == '\t') {
    711 			i--
    712 		}
    713 		if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') {
    714 			lineEnd = titleEnd
    715 			titleEnd = i
    716 		}
    717 	}
    718 
    719 	return
    720 }
    721 
    722 // The first bit of this logic is the same as Parser.listItem, but the rest
    723 // is much simpler. This function simply finds the entire block and shifts it
    724 // over by one tab if it is indeed a block (just returns the line if it's not).
    725 // blockEnd is the end of the section in the input buffer, and contents is the
    726 // extracted text that was shifted over one tab. It will need to be rendered at
    727 // the end of the document.
    728 func scanFootnote(p *Markdown, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) {
    729 	if i == 0 || len(data) == 0 {
    730 		return
    731 	}
    732 
    733 	// skip leading whitespace on first line
    734 	for i < len(data) && data[i] == ' ' {
    735 		i++
    736 	}
    737 
    738 	blockStart = i
    739 
    740 	// find the end of the line
    741 	blockEnd = i
    742 	for i < len(data) && data[i-1] != '\n' {
    743 		i++
    744 	}
    745 
    746 	// get working buffer
    747 	var raw bytes.Buffer
    748 
    749 	// put the first line into the working buffer
    750 	raw.Write(data[blockEnd:i])
    751 	blockEnd = i
    752 
    753 	// process the following lines
    754 	containsBlankLine := false
    755 
    756 gatherLines:
    757 	for blockEnd < len(data) {
    758 		i++
    759 
    760 		// find the end of this line
    761 		for i < len(data) && data[i-1] != '\n' {
    762 			i++
    763 		}
    764 
    765 		// if it is an empty line, guess that it is part of this item
    766 		// and move on to the next line
    767 		if p.isEmpty(data[blockEnd:i]) > 0 {
    768 			containsBlankLine = true
    769 			blockEnd = i
    770 			continue
    771 		}
    772 
    773 		n := 0
    774 		if n = isIndented(data[blockEnd:i], indentSize); n == 0 {
    775 			// this is the end of the block.
    776 			// we don't want to include this last line in the index.
    777 			break gatherLines
    778 		}
    779 
    780 		// if there were blank lines before this one, insert a new one now
    781 		if containsBlankLine {
    782 			raw.WriteByte('\n')
    783 			containsBlankLine = false
    784 		}
    785 
    786 		// get rid of that first tab, write to buffer
    787 		raw.Write(data[blockEnd+n : i])
    788 		hasBlock = true
    789 
    790 		blockEnd = i
    791 	}
    792 
    793 	if data[blockEnd-1] != '\n' {
    794 		raw.WriteByte('\n')
    795 	}
    796 
    797 	contents = raw.Bytes()
    798 
    799 	return
    800 }
    801 
    802 //
    803 //
    804 // Miscellaneous helper functions
    805 //
    806 //
    807 
    808 // Test if a character is a punctuation symbol.
    809 // Taken from a private function in regexp in the stdlib.
    810 func ispunct(c byte) bool {
    811 	for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") {
    812 		if c == r {
    813 			return true
    814 		}
    815 	}
    816 	return false
    817 }
    818 
    819 // Test if a character is a whitespace character.
    820 func isspace(c byte) bool {
    821 	return ishorizontalspace(c) || isverticalspace(c)
    822 }
    823 
    824 // Test if a character is a horizontal whitespace character.
    825 func ishorizontalspace(c byte) bool {
    826 	return c == ' ' || c == '\t'
    827 }
    828 
    829 // Test if a character is a vertical character.
    830 func isverticalspace(c byte) bool {
    831 	return c == '\n' || c == '\r' || c == '\f' || c == '\v'
    832 }
    833 
    834 // Test if a character is letter.
    835 func isletter(c byte) bool {
    836 	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
    837 }
    838 
    839 // Test if a character is a letter or a digit.
    840 // TODO: check when this is looking for ASCII alnum and when it should use unicode
    841 func isalnum(c byte) bool {
    842 	return (c >= '0' && c <= '9') || isletter(c)
    843 }
    844 
    845 // Replace tab characters with spaces, aligning to the next TAB_SIZE column.
    846 // always ends output with a newline
    847 func expandTabs(out *bytes.Buffer, line []byte, tabSize int) {
    848 	// first, check for common cases: no tabs, or only tabs at beginning of line
    849 	i, prefix := 0, 0
    850 	slowcase := false
    851 	for i = 0; i < len(line); i++ {
    852 		if line[i] == '\t' {
    853 			if prefix == i {
    854 				prefix++
    855 			} else {
    856 				slowcase = true
    857 				break
    858 			}
    859 		}
    860 	}
    861 
    862 	// no need to decode runes if all tabs are at the beginning of the line
    863 	if !slowcase {
    864 		for i = 0; i < prefix*tabSize; i++ {
    865 			out.WriteByte(' ')
    866 		}
    867 		out.Write(line[prefix:])
    868 		return
    869 	}
    870 
    871 	// the slow case: we need to count runes to figure out how
    872 	// many spaces to insert for each tab
    873 	column := 0
    874 	i = 0
    875 	for i < len(line) {
    876 		start := i
    877 		for i < len(line) && line[i] != '\t' {
    878 			_, size := utf8.DecodeRune(line[i:])
    879 			i += size
    880 			column++
    881 		}
    882 
    883 		if i > start {
    884 			out.Write(line[start:i])
    885 		}
    886 
    887 		if i >= len(line) {
    888 			break
    889 		}
    890 
    891 		for {
    892 			out.WriteByte(' ')
    893 			column++
    894 			if column%tabSize == 0 {
    895 				break
    896 			}
    897 		}
    898 
    899 		i++
    900 	}
    901 }
    902 
    903 // Find if a line counts as indented or not.
    904 // Returns number of characters the indent is (0 = not indented).
    905 func isIndented(data []byte, indentSize int) int {
    906 	if len(data) == 0 {
    907 		return 0
    908 	}
    909 	if data[0] == '\t' {
    910 		return 1
    911 	}
    912 	if len(data) < indentSize {
    913 		return 0
    914 	}
    915 	for i := 0; i < indentSize; i++ {
    916 		if data[i] != ' ' {
    917 			return 0
    918 		}
    919 	}
    920 	return indentSize
    921 }
    922 
    923 // Create a url-safe slug for fragments
    924 func slugify(in []byte) []byte {
    925 	if len(in) == 0 {
    926 		return in
    927 	}
    928 	out := make([]byte, 0, len(in))
    929 	sym := false
    930 
    931 	for _, ch := range in {
    932 		if isalnum(ch) {
    933 			sym = false
    934 			out = append(out, ch)
    935 		} else if sym {
    936 			continue
    937 		} else {
    938 			out = append(out, '-')
    939 			sym = true
    940 		}
    941 	}
    942 	var a, b int
    943 	var ch byte
    944 	for a, ch = range out {
    945 		if ch != '-' {
    946 			break
    947 		}
    948 	}
    949 	for b = len(out) - 1; b > 0; b-- {
    950 		if out[b] != '-' {
    951 			break
    952 		}
    953 	}
    954 	return out[a : b+1]
    955 }