markdown.go (26519B)
1 // Blackfriday Markdown Processor 2 // Available at http://github.com/russross/blackfriday 3 // 4 // Copyright © 2011 Russ Ross <russ@russross.com>. 5 // Distributed under the Simplified BSD License. 6 // See README.md for details. 7 8 package blackfriday 9 10 import ( 11 "bytes" 12 "fmt" 13 "io" 14 "strings" 15 "unicode/utf8" 16 ) 17 18 // 19 // Markdown parsing and processing 20 // 21 22 // Version string of the package. Appears in the rendered document when 23 // CompletePage flag is on. 24 const Version = "2.0" 25 26 // Extensions is a bitwise or'ed collection of enabled Blackfriday's 27 // extensions. 28 type Extensions int 29 30 // These are the supported markdown parsing extensions. 31 // OR these values together to select multiple extensions. 32 const ( 33 NoExtensions Extensions = 0 34 NoIntraEmphasis Extensions = 1 << iota // Ignore emphasis markers inside words 35 Tables // Render tables 36 FencedCode // Render fenced code blocks 37 Autolink // Detect embedded URLs that are not explicitly marked 38 Strikethrough // Strikethrough text using ~~test~~ 39 LaxHTMLBlocks // Loosen up HTML block parsing rules 40 SpaceHeadings // Be strict about prefix heading rules 41 HardLineBreak // Translate newlines into line breaks 42 TabSizeEight // Expand tabs to eight spaces instead of four 43 Footnotes // Pandoc-style footnotes 44 NoEmptyLineBeforeBlock // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block 45 HeadingIDs // specify heading IDs with {#id} 46 Titleblock // Titleblock ala pandoc 47 AutoHeadingIDs // Create the heading ID from the text 48 BackslashLineBreak // Translate trailing backslashes into line breaks 49 DefinitionLists // Render definition lists 50 NoLink // No links/images 51 ManualLineBreak // 52 53 CommonHTMLFlags HTMLFlags = UseXHTML | Smartypants | 54 SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes 55 56 CommonExtensions Extensions = NoIntraEmphasis | Tables | FencedCode | 57 Autolink | Strikethrough | SpaceHeadings | HeadingIDs | 58 BackslashLineBreak | DefinitionLists 59 ) 60 61 // ListType contains bitwise or'ed flags for list and list item objects. 62 type ListType int 63 64 // These are the possible flag values for the ListItem renderer. 65 // Multiple flag values may be ORed together. 66 // These are mostly of interest if you are writing a new output format. 67 const ( 68 ListTypeOrdered ListType = 1 << iota 69 ListTypeDefinition 70 ListTypeTerm 71 72 ListItemContainsBlock 73 ListItemBeginningOfList // TODO: figure out if this is of any use now 74 ListItemEndOfList 75 ) 76 77 // CellAlignFlags holds a type of alignment in a table cell. 78 type CellAlignFlags int 79 80 // These are the possible flag values for the table cell renderer. 81 // Only a single one of these values will be used; they are not ORed together. 82 // These are mostly of interest if you are writing a new output format. 83 const ( 84 TableAlignmentLeft CellAlignFlags = 1 << iota 85 TableAlignmentRight 86 TableAlignmentCenter = (TableAlignmentLeft | TableAlignmentRight) 87 ) 88 89 // The size of a tab stop. 90 const ( 91 TabSizeDefault = 4 92 TabSizeDouble = 8 93 ) 94 95 // blockTags is a set of tags that are recognized as HTML block tags. 96 // Any of these can be included in markdown text without special escaping. 97 var blockTags = map[string]struct{}{ 98 "blockquote": {}, 99 "del": {}, 100 "div": {}, 101 "dl": {}, 102 "fieldset": {}, 103 "form": {}, 104 "h1": {}, 105 "h2": {}, 106 "h3": {}, 107 "h4": {}, 108 "h5": {}, 109 "h6": {}, 110 "iframe": {}, 111 "ins": {}, 112 "math": {}, 113 "noscript": {}, 114 "ol": {}, 115 "pre": {}, 116 "p": {}, 117 "script": {}, 118 "style": {}, 119 "table": {}, 120 "ul": {}, 121 122 // HTML5 123 "address": {}, 124 "article": {}, 125 "aside": {}, 126 "canvas": {}, 127 "figcaption": {}, 128 "figure": {}, 129 "footer": {}, 130 "header": {}, 131 "hgroup": {}, 132 "main": {}, 133 "nav": {}, 134 "output": {}, 135 "progress": {}, 136 "section": {}, 137 "video": {}, 138 } 139 140 // Renderer is the rendering interface. This is mostly of interest if you are 141 // implementing a new rendering format. 142 // 143 // Only an HTML implementation is provided in this repository, see the README 144 // for external implementations. 145 type Renderer interface { 146 // RenderNode is the main rendering method. It will be called once for 147 // every leaf node and twice for every non-leaf node (first with 148 // entering=true, then with entering=false). The method should write its 149 // rendition of the node to the supplied writer w. 150 RenderNode(w io.Writer, node *Node, entering bool) WalkStatus 151 152 // RenderHeader is a method that allows the renderer to produce some 153 // content preceding the main body of the output document. The header is 154 // understood in the broad sense here. For example, the default HTML 155 // renderer will write not only the HTML document preamble, but also the 156 // table of contents if it was requested. 157 // 158 // The method will be passed an entire document tree, in case a particular 159 // implementation needs to inspect it to produce output. 160 // 161 // The output should be written to the supplied writer w. If your 162 // implementation has no header to write, supply an empty implementation. 163 RenderHeader(w io.Writer, ast *Node) 164 165 // RenderFooter is a symmetric counterpart of RenderHeader. 166 RenderFooter(w io.Writer, ast *Node) 167 } 168 169 // Callback functions for inline parsing. One such function is defined 170 // for each character that triggers a response when parsing inline data. 171 type inlineParser func(p *Markdown, data []byte, offset int) (int, *Node) 172 173 // Markdown is a type that holds extensions and the runtime state used by 174 // Parse, and the renderer. You can not use it directly, construct it with New. 175 type Markdown struct { 176 renderer Renderer 177 referenceOverride ReferenceOverrideFunc 178 refs map[string]*reference 179 inlineCallback [256]inlineParser 180 extensions Extensions 181 nesting int 182 maxNesting int 183 insideLink bool 184 185 // Footnotes need to be ordered as well as available to quickly check for 186 // presence. If a ref is also a footnote, it's stored both in refs and here 187 // in notes. Slice is nil if footnotes not enabled. 188 notes []*reference 189 190 doc *Node 191 tip *Node // = doc 192 oldTip *Node 193 lastMatchedContainer *Node // = doc 194 allClosed bool 195 } 196 197 func (p *Markdown) getRef(refid string) (ref *reference, found bool) { 198 if p.referenceOverride != nil { 199 r, overridden := p.referenceOverride(refid) 200 if overridden { 201 if r == nil { 202 return nil, false 203 } 204 return &reference{ 205 link: []byte(r.Link), 206 title: []byte(r.Title), 207 noteID: 0, 208 hasBlock: false, 209 text: []byte(r.Text)}, true 210 } 211 } 212 // refs are case insensitive 213 ref, found = p.refs[strings.ToLower(refid)] 214 return ref, found 215 } 216 217 func (p *Markdown) finalize(block *Node) { 218 above := block.Parent 219 block.open = false 220 p.tip = above 221 } 222 223 func (p *Markdown) addChild(node NodeType, offset uint32) *Node { 224 return p.addExistingChild(NewNode(node), offset) 225 } 226 227 func (p *Markdown) addExistingChild(node *Node, offset uint32) *Node { 228 for !p.tip.canContain(node.Type) { 229 p.finalize(p.tip) 230 } 231 p.tip.AppendChild(node) 232 p.tip = node 233 return node 234 } 235 236 func (p *Markdown) closeUnmatchedBlocks() { 237 if !p.allClosed { 238 for p.oldTip != p.lastMatchedContainer { 239 parent := p.oldTip.Parent 240 p.finalize(p.oldTip) 241 p.oldTip = parent 242 } 243 p.allClosed = true 244 } 245 } 246 247 // 248 // 249 // Public interface 250 // 251 // 252 253 // Reference represents the details of a link. 254 // See the documentation in Options for more details on use-case. 255 type Reference struct { 256 // Link is usually the URL the reference points to. 257 Link string 258 // Title is the alternate text describing the link in more detail. 259 Title string 260 // Text is the optional text to override the ref with if the syntax used was 261 // [refid][] 262 Text string 263 } 264 265 // ReferenceOverrideFunc is expected to be called with a reference string and 266 // return either a valid Reference type that the reference string maps to or 267 // nil. If overridden is false, the default reference logic will be executed. 268 // See the documentation in Options for more details on use-case. 269 type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool) 270 271 // New constructs a Markdown processor. You can use the same With* functions as 272 // for Run() to customize parser's behavior and the renderer. 273 func New(opts ...Option) *Markdown { 274 var p Markdown 275 for _, opt := range opts { 276 opt(&p) 277 } 278 p.refs = make(map[string]*reference) 279 p.maxNesting = 16 280 p.insideLink = false 281 docNode := NewNode(Document) 282 p.doc = docNode 283 p.tip = docNode 284 p.oldTip = docNode 285 p.lastMatchedContainer = docNode 286 p.allClosed = true 287 // register inline parsers 288 p.inlineCallback[' '] = maybeLineBreak 289 p.inlineCallback['#'] = censored 290 p.inlineCallback['*'] = emphasis 291 //p.inlineCallback['_'] = emphasis 292 if p.extensions&Strikethrough != 0 { 293 p.inlineCallback['~'] = emphasis 294 } 295 p.inlineCallback['`'] = codeSpan 296 p.inlineCallback['\n'] = lineBreak 297 if p.extensions&NoLink == 0 { 298 p.inlineCallback['['] = link 299 } 300 p.inlineCallback['<'] = leftAngle 301 p.inlineCallback['\\'] = escape 302 p.inlineCallback['&'] = entity 303 p.inlineCallback['!'] = maybeImage 304 p.inlineCallback['^'] = maybeInlineFootnote 305 if p.extensions&Autolink != 0 { 306 p.inlineCallback['h'] = maybeAutoLink 307 p.inlineCallback['m'] = maybeAutoLink 308 p.inlineCallback['f'] = maybeAutoLink 309 p.inlineCallback['H'] = maybeAutoLink 310 p.inlineCallback['M'] = maybeAutoLink 311 p.inlineCallback['F'] = maybeAutoLink 312 } 313 if p.extensions&Footnotes != 0 { 314 p.notes = make([]*reference, 0) 315 } 316 return &p 317 } 318 319 // Option customizes the Markdown processor's default behavior. 320 type Option func(*Markdown) 321 322 // WithRenderer allows you to override the default renderer. 323 func WithRenderer(r Renderer) Option { 324 return func(p *Markdown) { 325 p.renderer = r 326 } 327 } 328 329 // WithExtensions allows you to pick some of the many extensions provided by 330 // Blackfriday. You can bitwise OR them. 331 func WithExtensions(e Extensions) Option { 332 return func(p *Markdown) { 333 p.extensions = e 334 } 335 } 336 337 // WithNoExtensions turns off all extensions and custom behavior. 338 func WithNoExtensions() Option { 339 return func(p *Markdown) { 340 p.extensions = NoExtensions 341 p.renderer = NewHTMLRenderer(HTMLRendererParameters{ 342 Flags: HTMLFlagsNone, 343 }) 344 } 345 } 346 347 // WithRefOverride sets an optional function callback that is called every 348 // time a reference is resolved. 349 // 350 // In Markdown, the link reference syntax can be made to resolve a link to 351 // a reference instead of an inline URL, in one of the following ways: 352 // 353 // * [link text][refid] 354 // * [refid][] 355 // 356 // Usually, the refid is defined at the bottom of the Markdown document. If 357 // this override function is provided, the refid is passed to the override 358 // function first, before consulting the defined refids at the bottom. If 359 // the override function indicates an override did not occur, the refids at 360 // the bottom will be used to fill in the link details. 361 func WithRefOverride(o ReferenceOverrideFunc) Option { 362 return func(p *Markdown) { 363 p.referenceOverride = o 364 } 365 } 366 367 // Run is the main entry point to Blackfriday. It parses and renders a 368 // block of markdown-encoded text. 369 // 370 // The simplest invocation of Run takes one argument, input: 371 // output := Run(input) 372 // This will parse the input with CommonExtensions enabled and render it with 373 // the default HTMLRenderer (with CommonHTMLFlags). 374 // 375 // Variadic arguments opts can customize the default behavior. Since Markdown 376 // type does not contain exported fields, you can not use it directly. Instead, 377 // use the With* functions. For example, this will call the most basic 378 // functionality, with no extensions: 379 // output := Run(input, WithNoExtensions()) 380 // 381 // You can use any number of With* arguments, even contradicting ones. They 382 // will be applied in order of appearance and the latter will override the 383 // former: 384 // output := Run(input, WithNoExtensions(), WithExtensions(exts), 385 // WithRenderer(yourRenderer)) 386 func Run(input []byte, opts ...Option) []byte { 387 r := NewHTMLRenderer(HTMLRendererParameters{ 388 Flags: CommonHTMLFlags, 389 }) 390 optList := []Option{WithRenderer(r), WithExtensions(CommonExtensions)} 391 optList = append(optList, opts...) 392 parser := New(optList...) 393 ast := parser.Parse(input) 394 var buf bytes.Buffer 395 parser.renderer.RenderHeader(&buf, ast) 396 ast.Walk(func(node *Node, entering bool) WalkStatus { 397 return parser.renderer.RenderNode(&buf, node, entering) 398 }) 399 parser.renderer.RenderFooter(&buf, ast) 400 return buf.Bytes() 401 } 402 403 // Parse is an entry point to the parsing part of Blackfriday. It takes an 404 // input markdown document and produces a syntax tree for its contents. This 405 // tree can then be rendered with a default or custom renderer, or 406 // analyzed/transformed by the caller to whatever non-standard needs they have. 407 // The return value is the root node of the syntax tree. 408 func (p *Markdown) Parse(input []byte) *Node { 409 p.block(input) 410 // Walk the tree and finish up some of unfinished blocks 411 for p.tip != nil { 412 p.finalize(p.tip) 413 } 414 // Walk the tree again and process inline markdown in each block 415 p.doc.Walk(func(node *Node, entering bool) WalkStatus { 416 if node.Type == Paragraph || node.Type == Heading || node.Type == TableCell { 417 p.inline(node, node.content) 418 node.content = nil 419 } 420 return GoToNext 421 }) 422 p.parseRefsToAST() 423 return p.doc 424 } 425 426 func (p *Markdown) parseRefsToAST() { 427 if p.extensions&Footnotes == 0 || len(p.notes) == 0 { 428 return 429 } 430 p.tip = p.doc 431 block := p.addBlock(List, nil) 432 block.IsFootnotesList = true 433 block.ListFlags = ListTypeOrdered 434 flags := ListItemBeginningOfList 435 // Note: this loop is intentionally explicit, not range-form. This is 436 // because the body of the loop will append nested footnotes to p.notes and 437 // we need to process those late additions. Range form would only walk over 438 // the fixed initial set. 439 for i := 0; i < len(p.notes); i++ { 440 ref := p.notes[i] 441 p.addExistingChild(ref.footnote, 0) 442 block := ref.footnote 443 block.ListFlags = flags | ListTypeOrdered 444 block.RefLink = ref.link 445 if ref.hasBlock { 446 flags |= ListItemContainsBlock 447 p.block(ref.title) 448 } else { 449 p.inline(block, ref.title) 450 } 451 flags &^= ListItemBeginningOfList | ListItemContainsBlock 452 } 453 above := block.Parent 454 finalizeList(block) 455 p.tip = above 456 block.Walk(func(node *Node, entering bool) WalkStatus { 457 if node.Type == Paragraph || node.Type == Heading { 458 p.inline(node, node.content) 459 node.content = nil 460 } 461 return GoToNext 462 }) 463 } 464 465 // 466 // Link references 467 // 468 // This section implements support for references that (usually) appear 469 // as footnotes in a document, and can be referenced anywhere in the document. 470 // The basic format is: 471 // 472 // [1]: http://www.google.com/ "Google" 473 // [2]: http://www.github.com/ "Github" 474 // 475 // Anywhere in the document, the reference can be linked by referring to its 476 // label, i.e., 1 and 2 in this example, as in: 477 // 478 // This library is hosted on [Github][2], a git hosting site. 479 // 480 // Actual footnotes as specified in Pandoc and supported by some other Markdown 481 // libraries such as php-markdown are also taken care of. They look like this: 482 // 483 // This sentence needs a bit of further explanation.[^note] 484 // 485 // [^note]: This is the explanation. 486 // 487 // Footnotes should be placed at the end of the document in an ordered list. 488 // Finally, there are inline footnotes such as: 489 // 490 // Inline footnotes^[Also supported.] provide a quick inline explanation, 491 // but are rendered at the bottom of the document. 492 // 493 494 // reference holds all information necessary for a reference-style links or 495 // footnotes. 496 // 497 // Consider this markdown with reference-style links: 498 // 499 // [link][ref] 500 // 501 // [ref]: /url/ "tooltip title" 502 // 503 // It will be ultimately converted to this HTML: 504 // 505 // <p><a href=\"/url/\" title=\"title\">link</a></p> 506 // 507 // And a reference structure will be populated as follows: 508 // 509 // p.refs["ref"] = &reference{ 510 // link: "/url/", 511 // title: "tooltip title", 512 // } 513 // 514 // Alternatively, reference can contain information about a footnote. Consider 515 // this markdown: 516 // 517 // Text needing a footnote.[^a] 518 // 519 // [^a]: This is the note 520 // 521 // A reference structure will be populated as follows: 522 // 523 // p.refs["a"] = &reference{ 524 // link: "a", 525 // title: "This is the note", 526 // noteID: <some positive int>, 527 // } 528 // 529 // TODO: As you can see, it begs for splitting into two dedicated structures 530 // for refs and for footnotes. 531 type reference struct { 532 link []byte 533 title []byte 534 noteID int // 0 if not a footnote ref 535 hasBlock bool 536 footnote *Node // a link to the Item node within a list of footnotes 537 538 text []byte // only gets populated by refOverride feature with Reference.Text 539 } 540 541 func (r *reference) String() string { 542 return fmt.Sprintf("{link: %q, title: %q, text: %q, noteID: %d, hasBlock: %v}", 543 r.link, r.title, r.text, r.noteID, r.hasBlock) 544 } 545 546 // Check whether or not data starts with a reference link. 547 // If so, it is parsed and stored in the list of references 548 // (in the render struct). 549 // Returns the number of bytes to skip to move past it, 550 // or zero if the first line is not a reference. 551 func isReference(p *Markdown, data []byte, tabSize int) int { 552 // up to 3 optional leading spaces 553 if len(data) < 4 { 554 return 0 555 } 556 i := 0 557 for i < 3 && data[i] == ' ' { 558 i++ 559 } 560 561 noteID := 0 562 563 // id part: anything but a newline between brackets 564 if data[i] != '[' { 565 return 0 566 } 567 i++ 568 if p.extensions&Footnotes != 0 { 569 if i < len(data) && data[i] == '^' { 570 // we can set it to anything here because the proper noteIds will 571 // be assigned later during the second pass. It just has to be != 0 572 noteID = 1 573 i++ 574 } 575 } 576 idOffset := i 577 for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' { 578 i++ 579 } 580 if i >= len(data) || data[i] != ']' { 581 return 0 582 } 583 idEnd := i 584 // footnotes can have empty ID, like this: [^], but a reference can not be 585 // empty like this: []. Break early if it's not a footnote and there's no ID 586 if noteID == 0 && idOffset == idEnd { 587 return 0 588 } 589 // spacer: colon (space | tab)* newline? (space | tab)* 590 i++ 591 if i >= len(data) || data[i] != ':' { 592 return 0 593 } 594 i++ 595 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { 596 i++ 597 } 598 if i < len(data) && (data[i] == '\n' || data[i] == '\r') { 599 i++ 600 if i < len(data) && data[i] == '\n' && data[i-1] == '\r' { 601 i++ 602 } 603 } 604 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { 605 i++ 606 } 607 if i >= len(data) { 608 return 0 609 } 610 611 var ( 612 linkOffset, linkEnd int 613 titleOffset, titleEnd int 614 lineEnd int 615 raw []byte 616 hasBlock bool 617 ) 618 619 if p.extensions&Footnotes != 0 && noteID != 0 { 620 linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize) 621 lineEnd = linkEnd 622 } else { 623 linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i) 624 } 625 if lineEnd == 0 { 626 return 0 627 } 628 629 // a valid ref has been found 630 631 ref := &reference{ 632 noteID: noteID, 633 hasBlock: hasBlock, 634 } 635 636 if noteID > 0 { 637 // reusing the link field for the id since footnotes don't have links 638 ref.link = data[idOffset:idEnd] 639 // if footnote, it's not really a title, it's the contained text 640 ref.title = raw 641 } else { 642 ref.link = data[linkOffset:linkEnd] 643 ref.title = data[titleOffset:titleEnd] 644 } 645 646 // id matches are case-insensitive 647 id := string(bytes.ToLower(data[idOffset:idEnd])) 648 649 p.refs[id] = ref 650 651 return lineEnd 652 } 653 654 func scanLinkRef(p *Markdown, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) { 655 // link: whitespace-free sequence, optionally between angle brackets 656 if data[i] == '<' { 657 i++ 658 } 659 linkOffset = i 660 for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' { 661 i++ 662 } 663 linkEnd = i 664 if data[linkOffset] == '<' && data[linkEnd-1] == '>' { 665 linkOffset++ 666 linkEnd-- 667 } 668 669 // optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) 670 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { 671 i++ 672 } 673 if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' { 674 return 675 } 676 677 // compute end-of-line 678 if i >= len(data) || data[i] == '\r' || data[i] == '\n' { 679 lineEnd = i 680 } 681 if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' { 682 lineEnd++ 683 } 684 685 // optional (space|tab)* spacer after a newline 686 if lineEnd > 0 { 687 i = lineEnd + 1 688 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { 689 i++ 690 } 691 } 692 693 // optional title: any non-newline sequence enclosed in '"() alone on its line 694 if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') { 695 i++ 696 titleOffset = i 697 698 // look for EOL 699 for i < len(data) && data[i] != '\n' && data[i] != '\r' { 700 i++ 701 } 702 if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' { 703 titleEnd = i + 1 704 } else { 705 titleEnd = i 706 } 707 708 // step back 709 i-- 710 for i > titleOffset && (data[i] == ' ' || data[i] == '\t') { 711 i-- 712 } 713 if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') { 714 lineEnd = titleEnd 715 titleEnd = i 716 } 717 } 718 719 return 720 } 721 722 // The first bit of this logic is the same as Parser.listItem, but the rest 723 // is much simpler. This function simply finds the entire block and shifts it 724 // over by one tab if it is indeed a block (just returns the line if it's not). 725 // blockEnd is the end of the section in the input buffer, and contents is the 726 // extracted text that was shifted over one tab. It will need to be rendered at 727 // the end of the document. 728 func scanFootnote(p *Markdown, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) { 729 if i == 0 || len(data) == 0 { 730 return 731 } 732 733 // skip leading whitespace on first line 734 for i < len(data) && data[i] == ' ' { 735 i++ 736 } 737 738 blockStart = i 739 740 // find the end of the line 741 blockEnd = i 742 for i < len(data) && data[i-1] != '\n' { 743 i++ 744 } 745 746 // get working buffer 747 var raw bytes.Buffer 748 749 // put the first line into the working buffer 750 raw.Write(data[blockEnd:i]) 751 blockEnd = i 752 753 // process the following lines 754 containsBlankLine := false 755 756 gatherLines: 757 for blockEnd < len(data) { 758 i++ 759 760 // find the end of this line 761 for i < len(data) && data[i-1] != '\n' { 762 i++ 763 } 764 765 // if it is an empty line, guess that it is part of this item 766 // and move on to the next line 767 if p.isEmpty(data[blockEnd:i]) > 0 { 768 containsBlankLine = true 769 blockEnd = i 770 continue 771 } 772 773 n := 0 774 if n = isIndented(data[blockEnd:i], indentSize); n == 0 { 775 // this is the end of the block. 776 // we don't want to include this last line in the index. 777 break gatherLines 778 } 779 780 // if there were blank lines before this one, insert a new one now 781 if containsBlankLine { 782 raw.WriteByte('\n') 783 containsBlankLine = false 784 } 785 786 // get rid of that first tab, write to buffer 787 raw.Write(data[blockEnd+n : i]) 788 hasBlock = true 789 790 blockEnd = i 791 } 792 793 if data[blockEnd-1] != '\n' { 794 raw.WriteByte('\n') 795 } 796 797 contents = raw.Bytes() 798 799 return 800 } 801 802 // 803 // 804 // Miscellaneous helper functions 805 // 806 // 807 808 // Test if a character is a punctuation symbol. 809 // Taken from a private function in regexp in the stdlib. 810 func ispunct(c byte) bool { 811 for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") { 812 if c == r { 813 return true 814 } 815 } 816 return false 817 } 818 819 // Test if a character is a whitespace character. 820 func isspace(c byte) bool { 821 return ishorizontalspace(c) || isverticalspace(c) 822 } 823 824 // Test if a character is a horizontal whitespace character. 825 func ishorizontalspace(c byte) bool { 826 return c == ' ' || c == '\t' 827 } 828 829 // Test if a character is a vertical character. 830 func isverticalspace(c byte) bool { 831 return c == '\n' || c == '\r' || c == '\f' || c == '\v' 832 } 833 834 // Test if a character is letter. 835 func isletter(c byte) bool { 836 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') 837 } 838 839 // Test if a character is a letter or a digit. 840 // TODO: check when this is looking for ASCII alnum and when it should use unicode 841 func isalnum(c byte) bool { 842 return (c >= '0' && c <= '9') || isletter(c) 843 } 844 845 // Replace tab characters with spaces, aligning to the next TAB_SIZE column. 846 // always ends output with a newline 847 func expandTabs(out *bytes.Buffer, line []byte, tabSize int) { 848 // first, check for common cases: no tabs, or only tabs at beginning of line 849 i, prefix := 0, 0 850 slowcase := false 851 for i = 0; i < len(line); i++ { 852 if line[i] == '\t' { 853 if prefix == i { 854 prefix++ 855 } else { 856 slowcase = true 857 break 858 } 859 } 860 } 861 862 // no need to decode runes if all tabs are at the beginning of the line 863 if !slowcase { 864 for i = 0; i < prefix*tabSize; i++ { 865 out.WriteByte(' ') 866 } 867 out.Write(line[prefix:]) 868 return 869 } 870 871 // the slow case: we need to count runes to figure out how 872 // many spaces to insert for each tab 873 column := 0 874 i = 0 875 for i < len(line) { 876 start := i 877 for i < len(line) && line[i] != '\t' { 878 _, size := utf8.DecodeRune(line[i:]) 879 i += size 880 column++ 881 } 882 883 if i > start { 884 out.Write(line[start:i]) 885 } 886 887 if i >= len(line) { 888 break 889 } 890 891 for { 892 out.WriteByte(' ') 893 column++ 894 if column%tabSize == 0 { 895 break 896 } 897 } 898 899 i++ 900 } 901 } 902 903 // Find if a line counts as indented or not. 904 // Returns number of characters the indent is (0 = not indented). 905 func isIndented(data []byte, indentSize int) int { 906 if len(data) == 0 { 907 return 0 908 } 909 if data[0] == '\t' { 910 return 1 911 } 912 if len(data) < indentSize { 913 return 0 914 } 915 for i := 0; i < indentSize; i++ { 916 if data[i] != ' ' { 917 return 0 918 } 919 } 920 return indentSize 921 } 922 923 // Create a url-safe slug for fragments 924 func slugify(in []byte) []byte { 925 if len(in) == 0 { 926 return in 927 } 928 out := make([]byte, 0, len(in)) 929 sym := false 930 931 for _, ch := range in { 932 if isalnum(ch) { 933 sym = false 934 out = append(out, ch) 935 } else if sym { 936 continue 937 } else { 938 out = append(out, '-') 939 sym = true 940 } 941 } 942 var a, b int 943 var ch byte 944 for a, ch = range out { 945 if ch != '-' { 946 break 947 } 948 } 949 for b = len(out) - 1; b > 0; b-- { 950 if out[b] != '-' { 951 break 952 } 953 } 954 return out[a : b+1] 955 }