inline.go (26889B)
1 // 2 // Blackfriday Markdown Processor 3 // Available at http://github.com/russross/blackfriday 4 // 5 // Copyright © 2011 Russ Ross <russ@russross.com>. 6 // Distributed under the Simplified BSD License. 7 // See README.md for details. 8 // 9 10 // 11 // Functions to parse inline elements. 12 // 13 14 package blackfriday 15 16 import ( 17 "bytes" 18 "regexp" 19 "strconv" 20 ) 21 22 var ( 23 urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+` 24 anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`) 25 26 // https://www.w3.org/TR/html5/syntax.html#character-references 27 // highest unicode code point in 17 planes (2^20): 1,114,112d = 28 // 7 dec digits or 6 hex digits 29 // named entity references can be 2-31 characters with stuff like < 30 // at one end and ∳ at the other. There 31 // are also sometimes numbers at the end, although this isn't inherent 32 // in the specification; there are never numbers anywhere else in 33 // current character references, though; see ¾ and ▒, etc. 34 // https://www.w3.org/TR/html5/syntax.html#named-character-references 35 // 36 // entity := "&" (named group | number ref) ";" 37 // named group := [a-zA-Z]{2,31}[0-9]{0,2} 38 // number ref := "#" (dec ref | hex ref) 39 // dec ref := [0-9]{1,7} 40 // hex ref := ("x" | "X") [0-9a-fA-F]{1,6} 41 htmlEntityRe = regexp.MustCompile(`&([a-zA-Z]{2,31}[0-9]{0,2}|#([0-9]{1,7}|[xX][0-9a-fA-F]{1,6}));`) 42 ) 43 44 // Functions to parse text within a block 45 // Each function returns the number of chars taken care of 46 // data is the complete block being rendered 47 // offset is the number of valid chars before the current cursor 48 49 func (p *Markdown) inline(currBlock *Node, data []byte) { 50 // handlers might call us recursively: enforce a maximum depth 51 if p.nesting >= p.maxNesting || len(data) == 0 { 52 return 53 } 54 p.nesting++ 55 beg, end := 0, 0 56 for end < len(data) { 57 handler := p.inlineCallback[data[end]] 58 if handler != nil { 59 if consumed, node := handler(p, data, end); consumed == 0 { 60 // No action from the callback. 61 end++ 62 } else { 63 // Copy inactive chars into the output. 64 currBlock.AppendChild(text(data[beg:end])) 65 if node != nil { 66 currBlock.AppendChild(node) 67 } 68 // Skip past whatever the callback used. 69 beg = end + consumed 70 end = beg 71 } 72 } else { 73 end++ 74 } 75 } 76 if beg < len(data) { 77 if data[end-1] == '\n' { 78 end-- 79 } 80 currBlock.AppendChild(text(data[beg:end])) 81 } 82 p.nesting-- 83 } 84 85 func censored(p *Markdown, data []byte, offset int) (int, *Node) { 86 data = data[offset:] 87 c := data[0] 88 89 if len(data) > 2 && data[1] != c { 90 ret, node := helperCensored(p, data[1:], c) 91 if ret == 0 { 92 return 0, nil 93 } 94 95 return ret + 1, node 96 } 97 98 return 0, nil 99 } 100 101 // single and double emphasis parsing 102 func emphasis(p *Markdown, data []byte, offset int) (int, *Node) { 103 data = data[offset:] 104 c := data[0] 105 106 if len(data) > 2 && data[1] != c { 107 // whitespace cannot follow an opening emphasis; 108 // strikethrough only takes two characters '~~' 109 if c == '~' || isspace(data[1]) { 110 return 0, nil 111 } 112 ret, node := helperEmphasis(p, data[1:], c) 113 if ret == 0 { 114 return 0, nil 115 } 116 117 return ret + 1, node 118 } 119 120 if len(data) > 3 && data[1] == c && data[2] != c { 121 if isspace(data[2]) { 122 return 0, nil 123 } 124 ret, node := helperDoubleEmphasis(p, data[2:], c) 125 if ret == 0 { 126 return 0, nil 127 } 128 129 return ret + 2, node 130 } 131 132 if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c { 133 if c == '~' || isspace(data[3]) { 134 return 0, nil 135 } 136 ret, node := helperTripleEmphasis(p, data, 3, c) 137 if ret == 0 { 138 return 0, nil 139 } 140 141 return ret + 3, node 142 } 143 144 return 0, nil 145 } 146 147 func codeSpan(p *Markdown, data []byte, offset int) (int, *Node) { 148 data = data[offset:] 149 150 nb := 0 151 152 // count the number of backticks in the delimiter 153 for nb < len(data) && data[nb] == '`' { 154 nb++ 155 } 156 157 // find the next delimiter 158 i, end := 0, 0 159 for end = nb; end < len(data) && i < nb; end++ { 160 if data[end] == '`' { 161 i++ 162 } else { 163 i = 0 164 } 165 } 166 167 // no matching delimiter? 168 if i < nb && end >= len(data) { 169 return 0, nil 170 } 171 172 // trim outside whitespace 173 fBegin := nb 174 for fBegin < end && data[fBegin] == ' ' { 175 fBegin++ 176 } 177 178 fEnd := end - nb 179 for fEnd > fBegin && data[fEnd-1] == ' ' { 180 fEnd-- 181 } 182 183 // render the code span 184 if fBegin != fEnd { 185 code := NewNode(Code) 186 code.Literal = data[fBegin:fEnd] 187 return end, code 188 } 189 190 return end, nil 191 } 192 193 // newline preceded by two spaces becomes <br> 194 func maybeLineBreak(p *Markdown, data []byte, offset int) (int, *Node) { 195 origOffset := offset 196 for offset < len(data) && data[offset] == ' ' { 197 offset++ 198 } 199 200 if offset < len(data) && data[offset] == '\n' { 201 if offset-origOffset >= 2 { 202 return offset - origOffset + 1, NewNode(Hardbreak) 203 } 204 return offset - origOffset, nil 205 } 206 return 0, nil 207 } 208 209 // newline without two spaces works when HardLineBreak is enabled 210 func lineBreak(p *Markdown, data []byte, offset int) (int, *Node) { 211 if p.extensions&HardLineBreak != 0 { 212 return 1, NewNode(Hardbreak) 213 } 214 return 0, nil 215 } 216 217 type linkType int 218 219 const ( 220 linkNormal linkType = iota 221 linkImg 222 linkDeferredFootnote 223 linkInlineFootnote 224 ) 225 226 func isReferenceStyleLink(data []byte, pos int, t linkType) bool { 227 if t == linkDeferredFootnote { 228 return false 229 } 230 return pos < len(data)-1 && data[pos] == '[' && data[pos+1] != '^' 231 } 232 233 func maybeImage(p *Markdown, data []byte, offset int) (int, *Node) { 234 if offset < len(data)-1 && data[offset+1] == '[' { 235 return link(p, data, offset) 236 } 237 return 0, nil 238 } 239 240 func maybeInlineFootnote(p *Markdown, data []byte, offset int) (int, *Node) { 241 if offset < len(data)-1 && data[offset+1] == '[' { 242 return link(p, data, offset) 243 } 244 return 0, nil 245 } 246 247 // '[': parse a link or an image or a footnote 248 func link(p *Markdown, data []byte, offset int) (int, *Node) { 249 // no links allowed inside regular links, footnote, and deferred footnotes 250 if p.insideLink && (offset > 0 && data[offset-1] == '[' || len(data)-1 > offset && data[offset+1] == '^') { 251 return 0, nil 252 } 253 254 var t linkType 255 switch { 256 // special case: ![^text] == deferred footnote (that follows something with 257 // an exclamation point) 258 case p.extensions&Footnotes != 0 && len(data)-1 > offset && data[offset+1] == '^': 259 t = linkDeferredFootnote 260 // ![alt] == image 261 case offset >= 0 && data[offset] == '!': 262 t = linkImg 263 offset++ 264 // ^[text] == inline footnote 265 // [^refId] == deferred footnote 266 case p.extensions&Footnotes != 0: 267 if offset >= 0 && data[offset] == '^' { 268 t = linkInlineFootnote 269 offset++ 270 } else if len(data)-1 > offset && data[offset+1] == '^' { 271 t = linkDeferredFootnote 272 } 273 // [text] == regular link 274 default: 275 t = linkNormal 276 } 277 278 data = data[offset:] 279 280 var ( 281 i = 1 282 noteID int 283 title, link, altContent []byte 284 textHasNl = false 285 ) 286 287 if t == linkDeferredFootnote { 288 i++ 289 } 290 291 // look for the matching closing bracket 292 for level := 1; level > 0 && i < len(data); i++ { 293 switch { 294 case data[i] == '\n': 295 textHasNl = true 296 297 case isBackslashEscaped(data, i): 298 continue 299 300 case data[i] == '[': 301 level++ 302 303 case data[i] == ']': 304 level-- 305 if level <= 0 { 306 i-- // compensate for extra i++ in for loop 307 } 308 } 309 } 310 311 if i >= len(data) { 312 return 0, nil 313 } 314 315 txtE := i 316 i++ 317 var footnoteNode *Node 318 319 // skip any amount of whitespace or newline 320 // (this is much more lax than original markdown syntax) 321 for i < len(data) && isspace(data[i]) { 322 i++ 323 } 324 325 // inline style link 326 switch { 327 case i < len(data) && data[i] == '(': 328 // skip initial whitespace 329 i++ 330 331 for i < len(data) && isspace(data[i]) { 332 i++ 333 } 334 335 linkB := i 336 337 // look for link end: ' " ) 338 findlinkend: 339 for i < len(data) { 340 switch { 341 case data[i] == '\\': 342 i += 2 343 344 case data[i] == ')' || data[i] == '\'' || data[i] == '"': 345 break findlinkend 346 347 default: 348 i++ 349 } 350 } 351 352 if i >= len(data) { 353 return 0, nil 354 } 355 linkE := i 356 357 // look for title end if present 358 titleB, titleE := 0, 0 359 if data[i] == '\'' || data[i] == '"' { 360 i++ 361 titleB = i 362 363 findtitleend: 364 for i < len(data) { 365 switch { 366 case data[i] == '\\': 367 i += 2 368 369 case data[i] == ')': 370 break findtitleend 371 372 default: 373 i++ 374 } 375 } 376 377 if i >= len(data) { 378 return 0, nil 379 } 380 381 // skip whitespace after title 382 titleE = i - 1 383 for titleE > titleB && isspace(data[titleE]) { 384 titleE-- 385 } 386 387 // check for closing quote presence 388 if data[titleE] != '\'' && data[titleE] != '"' { 389 titleB, titleE = 0, 0 390 linkE = i 391 } 392 } 393 394 // remove whitespace at the end of the link 395 for linkE > linkB && isspace(data[linkE-1]) { 396 linkE-- 397 } 398 399 // remove optional angle brackets around the link 400 if data[linkB] == '<' { 401 linkB++ 402 } 403 if data[linkE-1] == '>' { 404 linkE-- 405 } 406 407 // build escaped link and title 408 if linkE > linkB { 409 link = data[linkB:linkE] 410 } 411 412 if titleE > titleB { 413 title = data[titleB:titleE] 414 } 415 416 i++ 417 418 // reference style link 419 case isReferenceStyleLink(data, i, t): 420 var id []byte 421 altContentConsidered := false 422 423 // look for the id 424 i++ 425 linkB := i 426 for i < len(data) && data[i] != ']' { 427 i++ 428 } 429 if i >= len(data) { 430 return 0, nil 431 } 432 linkE := i 433 434 // find the reference 435 if linkB == linkE { 436 if textHasNl { 437 var b bytes.Buffer 438 439 for j := 1; j < txtE; j++ { 440 switch { 441 case data[j] != '\n': 442 b.WriteByte(data[j]) 443 case data[j-1] != ' ': 444 b.WriteByte(' ') 445 } 446 } 447 448 id = b.Bytes() 449 } else { 450 id = data[1:txtE] 451 altContentConsidered = true 452 } 453 } else { 454 id = data[linkB:linkE] 455 } 456 457 // find the reference with matching id 458 lr, ok := p.getRef(string(id)) 459 if !ok { 460 return 0, nil 461 } 462 463 // keep link and title from reference 464 link = lr.link 465 title = lr.title 466 if altContentConsidered { 467 altContent = lr.text 468 } 469 i++ 470 471 // shortcut reference style link or reference or inline footnote 472 default: 473 var id []byte 474 475 // craft the id 476 if textHasNl { 477 var b bytes.Buffer 478 479 for j := 1; j < txtE; j++ { 480 switch { 481 case data[j] != '\n': 482 b.WriteByte(data[j]) 483 case data[j-1] != ' ': 484 b.WriteByte(' ') 485 } 486 } 487 488 id = b.Bytes() 489 } else { 490 if t == linkDeferredFootnote { 491 id = data[2:txtE] // get rid of the ^ 492 } else { 493 id = data[1:txtE] 494 } 495 } 496 497 footnoteNode = NewNode(Item) 498 if t == linkInlineFootnote { 499 // create a new reference 500 noteID = len(p.notes) + 1 501 502 var fragment []byte 503 if len(id) > 0 { 504 if len(id) < 16 { 505 fragment = make([]byte, len(id)) 506 } else { 507 fragment = make([]byte, 16) 508 } 509 copy(fragment, slugify(id)) 510 } else { 511 fragment = append([]byte("footnote-"), []byte(strconv.Itoa(noteID))...) 512 } 513 514 ref := &reference{ 515 noteID: noteID, 516 hasBlock: false, 517 link: fragment, 518 title: id, 519 footnote: footnoteNode, 520 } 521 522 p.notes = append(p.notes, ref) 523 524 link = ref.link 525 title = ref.title 526 } else { 527 // find the reference with matching id 528 lr, ok := p.getRef(string(id)) 529 if !ok { 530 return 0, nil 531 } 532 533 if t == linkDeferredFootnote { 534 lr.noteID = len(p.notes) + 1 535 lr.footnote = footnoteNode 536 p.notes = append(p.notes, lr) 537 } 538 539 // keep link and title from reference 540 link = lr.link 541 // if inline footnote, title == footnote contents 542 title = lr.title 543 noteID = lr.noteID 544 } 545 546 // rewind the whitespace 547 i = txtE + 1 548 } 549 550 var uLink []byte 551 if t == linkNormal || t == linkImg { 552 if len(link) > 0 { 553 var uLinkBuf bytes.Buffer 554 unescapeText(&uLinkBuf, link) 555 uLink = uLinkBuf.Bytes() 556 } 557 558 // links need something to click on and somewhere to go 559 if len(uLink) == 0 || (t == linkNormal && txtE <= 1) { 560 return 0, nil 561 } 562 } 563 564 // call the relevant rendering function 565 var linkNode *Node 566 switch t { 567 case linkNormal: 568 linkNode = NewNode(Link) 569 linkNode.Destination = normalizeURI(uLink) 570 linkNode.Title = title 571 if len(altContent) > 0 { 572 linkNode.AppendChild(text(altContent)) 573 } else { 574 // links cannot contain other links, so turn off link parsing 575 // temporarily and recurse 576 insideLink := p.insideLink 577 p.insideLink = true 578 p.inline(linkNode, data[1:txtE]) 579 p.insideLink = insideLink 580 } 581 582 case linkImg: 583 linkNode = NewNode(Image) 584 linkNode.Destination = uLink 585 linkNode.Title = title 586 linkNode.AppendChild(text(data[1:txtE])) 587 i++ 588 589 case linkInlineFootnote, linkDeferredFootnote: 590 linkNode = NewNode(Link) 591 linkNode.Destination = link 592 linkNode.Title = title 593 linkNode.NoteID = noteID 594 linkNode.Footnote = footnoteNode 595 if t == linkInlineFootnote { 596 i++ 597 } 598 599 default: 600 return 0, nil 601 } 602 603 return i, linkNode 604 } 605 606 func (p *Markdown) inlineHTMLComment(data []byte) int { 607 if len(data) < 5 { 608 return 0 609 } 610 if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' { 611 return 0 612 } 613 i := 5 614 // scan for an end-of-comment marker, across lines if necessary 615 for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') { 616 i++ 617 } 618 // no end-of-comment marker 619 if i >= len(data) { 620 return 0 621 } 622 return i + 1 623 } 624 625 func stripMailto(link []byte) []byte { 626 if bytes.HasPrefix(link, []byte("mailto://")) { 627 return link[9:] 628 } else if bytes.HasPrefix(link, []byte("mailto:")) { 629 return link[7:] 630 } else { 631 return link 632 } 633 } 634 635 // autolinkType specifies a kind of autolink that gets detected. 636 type autolinkType int 637 638 // These are the possible flag values for the autolink renderer. 639 const ( 640 notAutolink autolinkType = iota 641 normalAutolink 642 emailAutolink 643 ) 644 645 // '<' when tags or autolinks are allowed 646 func leftAngle(p *Markdown, data []byte, offset int) (int, *Node) { 647 data = data[offset:] 648 altype, end := tagLength(data) 649 if size := p.inlineHTMLComment(data); size > 0 { 650 end = size 651 } 652 if end > 2 { 653 if altype != notAutolink { 654 var uLink bytes.Buffer 655 unescapeText(&uLink, data[1:end+1-2]) 656 if uLink.Len() > 0 { 657 link := uLink.Bytes() 658 node := NewNode(Link) 659 node.Destination = link 660 if altype == emailAutolink { 661 node.Destination = append([]byte("mailto:"), link...) 662 } 663 node.AppendChild(text(stripMailto(link))) 664 return end, node 665 } 666 } else { 667 htmlTag := NewNode(HTMLSpan) 668 htmlTag.Literal = data[:end] 669 return end, htmlTag 670 } 671 } 672 673 return end, nil 674 } 675 676 // '\\' backslash escape 677 var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>~") 678 679 func escape(p *Markdown, data []byte, offset int) (int, *Node) { 680 data = data[offset:] 681 682 if len(data) > 1 { 683 if p.extensions&ManualLineBreak != 0 && data[1] == 'n' { 684 return 2, NewNode(Hardbreak) 685 } 686 if p.extensions&BackslashLineBreak != 0 && data[1] == '\n' { 687 return 2, NewNode(Hardbreak) 688 } 689 if bytes.IndexByte(escapeChars, data[1]) < 0 { 690 return 0, nil 691 } 692 693 return 2, text(data[1:2]) 694 } 695 696 return 2, nil 697 } 698 699 func unescapeText(ob *bytes.Buffer, src []byte) { 700 i := 0 701 for i < len(src) { 702 org := i 703 for i < len(src) && src[i] != '\\' { 704 i++ 705 } 706 707 if i > org { 708 ob.Write(src[org:i]) 709 } 710 711 if i+1 >= len(src) { 712 break 713 } 714 715 ob.WriteByte(src[i+1]) 716 i += 2 717 } 718 } 719 720 // '&' escaped when it doesn't belong to an entity 721 // valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; 722 func entity(p *Markdown, data []byte, offset int) (int, *Node) { 723 data = data[offset:] 724 725 end := 1 726 727 if end < len(data) && data[end] == '#' { 728 end++ 729 } 730 731 for end < len(data) && isalnum(data[end]) { 732 end++ 733 } 734 735 if end < len(data) && data[end] == ';' { 736 end++ // real entity 737 } else { 738 return 0, nil // lone '&' 739 } 740 741 ent := data[:end] 742 // undo & escaping or it will be converted to &amp; by another 743 // escaper in the renderer 744 if bytes.Equal(ent, []byte("&")) { 745 ent = []byte{'&'} 746 } 747 748 return end, text(ent) 749 } 750 751 func linkEndsWithEntity(data []byte, linkEnd int) bool { 752 entityRanges := htmlEntityRe.FindAllIndex(data[:linkEnd], -1) 753 return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd 754 } 755 756 // hasPrefixCaseInsensitive is a custom implementation of 757 // strings.HasPrefix(strings.ToLower(s), prefix) 758 // we rolled our own because ToLower pulls in a huge machinery of lowercasing 759 // anything from Unicode and that's very slow. Since this func will only be 760 // used on ASCII protocol prefixes, we can take shortcuts. 761 func hasPrefixCaseInsensitive(s, prefix []byte) bool { 762 if len(s) < len(prefix) { 763 return false 764 } 765 delta := byte('a' - 'A') 766 for i, b := range prefix { 767 if b != s[i] && b != s[i]+delta { 768 return false 769 } 770 } 771 return true 772 } 773 774 var protocolPrefixes = [][]byte{ 775 []byte("http://"), 776 []byte("https://"), 777 []byte("ftp://"), 778 []byte("file://"), 779 []byte("mailto:"), 780 } 781 782 const shortestPrefix = 6 // len("ftp://"), the shortest of the above 783 784 func maybeAutoLink(p *Markdown, data []byte, offset int) (int, *Node) { 785 // quick check to rule out most false hits 786 if p.insideLink || len(data) < offset+shortestPrefix { 787 return 0, nil 788 } 789 for _, prefix := range protocolPrefixes { 790 endOfHead := offset + 8 // 8 is the len() of the longest prefix 791 if endOfHead > len(data) { 792 endOfHead = len(data) 793 } 794 if hasPrefixCaseInsensitive(data[offset:endOfHead], prefix) { 795 return autoLink(p, data, offset) 796 } 797 } 798 return 0, nil 799 } 800 801 func autoLink(p *Markdown, data []byte, offset int) (int, *Node) { 802 // Now a more expensive check to see if we're not inside an anchor element 803 anchorStart := offset 804 offsetFromAnchor := 0 805 for anchorStart > 0 && data[anchorStart] != '<' { 806 anchorStart-- 807 offsetFromAnchor++ 808 } 809 810 anchorStr := anchorRe.Find(data[anchorStart:]) 811 if anchorStr != nil { 812 anchorClose := NewNode(HTMLSpan) 813 anchorClose.Literal = anchorStr[offsetFromAnchor:] 814 return len(anchorStr) - offsetFromAnchor, anchorClose 815 } 816 817 // scan backward for a word boundary 818 rewind := 0 819 for offset-rewind > 0 && rewind <= 7 && isletter(data[offset-rewind-1]) { 820 rewind++ 821 } 822 if rewind > 6 { // longest supported protocol is "mailto" which has 6 letters 823 return 0, nil 824 } 825 826 origData := data 827 data = data[offset-rewind:] 828 829 if !isSafeLink(data) { 830 return 0, nil 831 } 832 833 linkEnd := 0 834 for linkEnd < len(data) && !isEndOfLink(data[linkEnd]) { 835 linkEnd++ 836 } 837 838 // Skip punctuation at the end of the link 839 if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',') && data[linkEnd-2] != '\\' { 840 linkEnd-- 841 } 842 843 // But don't skip semicolon if it's a part of escaped entity: 844 if data[linkEnd-1] == ';' && data[linkEnd-2] != '\\' && !linkEndsWithEntity(data, linkEnd) { 845 linkEnd-- 846 } 847 848 // See if the link finishes with a punctuation sign that can be closed. 849 var copen byte 850 switch data[linkEnd-1] { 851 case '"': 852 copen = '"' 853 case '\'': 854 copen = '\'' 855 case ')': 856 copen = '(' 857 case ']': 858 copen = '[' 859 case '}': 860 copen = '{' 861 default: 862 copen = 0 863 } 864 865 if copen != 0 { 866 bufEnd := offset - rewind + linkEnd - 2 867 868 openDelim := 1 869 870 /* Try to close the final punctuation sign in this same line; 871 * if we managed to close it outside of the URL, that means that it's 872 * not part of the URL. If it closes inside the URL, that means it 873 * is part of the URL. 874 * 875 * Examples: 876 * 877 * foo http://www.pokemon.com/Pikachu_(Electric) bar 878 * => http://www.pokemon.com/Pikachu_(Electric) 879 * 880 * foo (http://www.pokemon.com/Pikachu_(Electric)) bar 881 * => http://www.pokemon.com/Pikachu_(Electric) 882 * 883 * foo http://www.pokemon.com/Pikachu_(Electric)) bar 884 * => http://www.pokemon.com/Pikachu_(Electric)) 885 * 886 * (foo http://www.pokemon.com/Pikachu_(Electric)) bar 887 * => foo http://www.pokemon.com/Pikachu_(Electric) 888 */ 889 890 for bufEnd >= 0 && origData[bufEnd] != '\n' && openDelim != 0 { 891 if origData[bufEnd] == data[linkEnd-1] { 892 openDelim++ 893 } 894 895 if origData[bufEnd] == copen { 896 openDelim-- 897 } 898 899 bufEnd-- 900 } 901 902 if openDelim == 0 { 903 linkEnd-- 904 } 905 } 906 907 var uLink bytes.Buffer 908 unescapeText(&uLink, data[:linkEnd]) 909 910 if uLink.Len() > 0 { 911 node := NewNode(Link) 912 node.Destination = uLink.Bytes() 913 node.AppendChild(text(uLink.Bytes())) 914 return linkEnd, node 915 } 916 917 return linkEnd, nil 918 } 919 920 func isEndOfLink(char byte) bool { 921 return isspace(char) || char == '<' 922 } 923 924 var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")} 925 var validPaths = [][]byte{[]byte("/"), []byte("./"), []byte("../")} 926 927 func isSafeLink(link []byte) bool { 928 for _, path := range validPaths { 929 if len(link) >= len(path) && bytes.Equal(link[:len(path)], path) { 930 if len(link) == len(path) { 931 return true 932 } else if isalnum(link[len(path)]) { 933 return true 934 } 935 } 936 } 937 938 for _, prefix := range validUris { 939 // TODO: handle unicode here 940 // case-insensitive prefix test 941 if len(link) > len(prefix) && bytes.Equal(bytes.ToLower(link[:len(prefix)]), prefix) && isalnum(link[len(prefix)]) { 942 return true 943 } 944 } 945 946 return false 947 } 948 949 // return the length of the given tag, or 0 is it's not valid 950 func tagLength(data []byte) (autolink autolinkType, end int) { 951 var i, j int 952 953 // a valid tag can't be shorter than 3 chars 954 if len(data) < 3 { 955 return notAutolink, 0 956 } 957 958 // begins with a '<' optionally followed by '/', followed by letter or number 959 if data[0] != '<' { 960 return notAutolink, 0 961 } 962 if data[1] == '/' { 963 i = 2 964 } else { 965 i = 1 966 } 967 968 if !isalnum(data[i]) { 969 return notAutolink, 0 970 } 971 972 // scheme test 973 autolink = notAutolink 974 975 // try to find the beginning of an URI 976 for i < len(data) && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') { 977 i++ 978 } 979 980 if i > 1 && i < len(data) && data[i] == '@' { 981 if j = isMailtoAutoLink(data[i:]); j != 0 { 982 return emailAutolink, i + j 983 } 984 } 985 986 if i > 2 && i < len(data) && data[i] == ':' { 987 autolink = normalAutolink 988 i++ 989 } 990 991 // complete autolink test: no whitespace or ' or " 992 switch { 993 case i >= len(data): 994 autolink = notAutolink 995 case autolink != notAutolink: 996 j = i 997 998 for i < len(data) { 999 if data[i] == '\\' { 1000 i += 2 1001 } else if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i]) { 1002 break 1003 } else { 1004 i++ 1005 } 1006 1007 } 1008 1009 if i >= len(data) { 1010 return autolink, 0 1011 } 1012 if i > j && data[i] == '>' { 1013 return autolink, i + 1 1014 } 1015 1016 // one of the forbidden chars has been found 1017 autolink = notAutolink 1018 } 1019 i += bytes.IndexByte(data[i:], '>') 1020 if i < 0 { 1021 return autolink, 0 1022 } 1023 return autolink, i + 1 1024 } 1025 1026 // look for the address part of a mail autolink and '>' 1027 // this is less strict than the original markdown e-mail address matching 1028 func isMailtoAutoLink(data []byte) int { 1029 nb := 0 1030 1031 // address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' 1032 for i := 0; i < len(data); i++ { 1033 if isalnum(data[i]) { 1034 continue 1035 } 1036 1037 switch data[i] { 1038 case '@': 1039 nb++ 1040 1041 case '-', '.', '_': 1042 break 1043 1044 case '>': 1045 if nb == 1 { 1046 return i + 1 1047 } 1048 return 0 1049 default: 1050 return 0 1051 } 1052 } 1053 1054 return 0 1055 } 1056 1057 // look for the next emph char, skipping other constructs 1058 func helperFindEmphChar(data []byte, c byte) int { 1059 i := 0 1060 1061 for i < len(data) { 1062 for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' { 1063 i++ 1064 } 1065 if i >= len(data) { 1066 return 0 1067 } 1068 // do not count escaped chars 1069 if i != 0 && data[i-1] == '\\' { 1070 i++ 1071 continue 1072 } 1073 if data[i] == c { 1074 return i 1075 } 1076 1077 if data[i] == '`' { 1078 // skip a code span 1079 tmpI := 0 1080 i++ 1081 for i < len(data) && data[i] != '`' { 1082 if tmpI == 0 && data[i] == c { 1083 tmpI = i 1084 } 1085 i++ 1086 } 1087 if i >= len(data) { 1088 return tmpI 1089 } 1090 i++ 1091 } else if data[i] == '[' { 1092 // skip a link 1093 tmpI := 0 1094 i++ 1095 for i < len(data) && data[i] != ']' { 1096 if tmpI == 0 && data[i] == c { 1097 tmpI = i 1098 } 1099 i++ 1100 } 1101 i++ 1102 for i < len(data) && (data[i] == ' ' || data[i] == '\n') { 1103 i++ 1104 } 1105 if i >= len(data) { 1106 return tmpI 1107 } 1108 if data[i] != '[' && data[i] != '(' { // not a link 1109 if tmpI > 0 { 1110 return tmpI 1111 } 1112 continue 1113 } 1114 cc := data[i] 1115 i++ 1116 for i < len(data) && data[i] != cc { 1117 if tmpI == 0 && data[i] == c { 1118 return i 1119 } 1120 i++ 1121 } 1122 if i >= len(data) { 1123 return tmpI 1124 } 1125 i++ 1126 } 1127 } 1128 return 0 1129 } 1130 1131 func helperCensored(p *Markdown, data []byte, c byte) (int, *Node) { 1132 i := 0 1133 1134 // skip one symbol if coming from emph3 1135 if len(data) > 1 && data[0] == c && data[1] == c { 1136 i = 1 1137 } 1138 1139 for i < len(data) { 1140 length := helperFindEmphChar(data[i:], c) 1141 if length == 0 { 1142 return 0, nil 1143 } 1144 i += length 1145 if i >= len(data) { 1146 return 0, nil 1147 } 1148 1149 if i+1 < len(data) && data[i+1] == c { 1150 i++ 1151 continue 1152 } 1153 1154 if data[i] == c && !isspace(data[i-1]) { 1155 1156 if p.extensions&NoIntraEmphasis != 0 { 1157 if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) { 1158 continue 1159 } 1160 } 1161 1162 emph := NewNode(Censored) 1163 p.inline(emph, data[:i]) 1164 return i + 1, emph 1165 } 1166 } 1167 1168 return 0, nil 1169 } 1170 1171 func helperEmphasis(p *Markdown, data []byte, c byte) (int, *Node) { 1172 i := 0 1173 1174 // skip one symbol if coming from emph3 1175 if len(data) > 1 && data[0] == c && data[1] == c { 1176 i = 1 1177 } 1178 1179 for i < len(data) { 1180 length := helperFindEmphChar(data[i:], c) 1181 if length == 0 { 1182 return 0, nil 1183 } 1184 i += length 1185 if i >= len(data) { 1186 return 0, nil 1187 } 1188 1189 if i+1 < len(data) && data[i+1] == c { 1190 i++ 1191 continue 1192 } 1193 1194 if data[i] == c && !isspace(data[i-1]) { 1195 1196 if p.extensions&NoIntraEmphasis != 0 { 1197 if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) { 1198 continue 1199 } 1200 } 1201 1202 emph := NewNode(Emph) 1203 p.inline(emph, data[:i]) 1204 return i + 1, emph 1205 } 1206 } 1207 1208 return 0, nil 1209 } 1210 1211 func helperDoubleEmphasis(p *Markdown, data []byte, c byte) (int, *Node) { 1212 i := 0 1213 1214 for i < len(data) { 1215 length := helperFindEmphChar(data[i:], c) 1216 if length == 0 { 1217 return 0, nil 1218 } 1219 i += length 1220 1221 if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) { 1222 nodeType := Strong 1223 if c == '~' { 1224 nodeType = Del 1225 } 1226 node := NewNode(nodeType) 1227 p.inline(node, data[:i]) 1228 return i + 2, node 1229 } 1230 i++ 1231 } 1232 return 0, nil 1233 } 1234 1235 func helperTripleEmphasis(p *Markdown, data []byte, offset int, c byte) (int, *Node) { 1236 i := 0 1237 origData := data 1238 data = data[offset:] 1239 1240 for i < len(data) { 1241 length := helperFindEmphChar(data[i:], c) 1242 if length == 0 { 1243 return 0, nil 1244 } 1245 i += length 1246 1247 // skip whitespace preceded symbols 1248 if data[i] != c || isspace(data[i-1]) { 1249 continue 1250 } 1251 1252 switch { 1253 case i+2 < len(data) && data[i+1] == c && data[i+2] == c: 1254 // triple symbol found 1255 strong := NewNode(Strong) 1256 em := NewNode(Emph) 1257 strong.AppendChild(em) 1258 p.inline(em, data[:i]) 1259 return i + 3, strong 1260 case (i+1 < len(data) && data[i+1] == c): 1261 // double symbol found, hand over to emph1 1262 length, node := helperEmphasis(p, origData[offset-2:], c) 1263 if length == 0 { 1264 return 0, nil 1265 } 1266 return length - 2, node 1267 default: 1268 // single symbol found, hand over to emph2 1269 length, node := helperDoubleEmphasis(p, origData[offset-1:], c) 1270 if length == 0 { 1271 return 0, nil 1272 } 1273 return length - 1, node 1274 } 1275 } 1276 return 0, nil 1277 } 1278 1279 func text(s []byte) *Node { 1280 node := NewNode(Text) 1281 node.Literal = s 1282 return node 1283 } 1284 1285 func normalizeURI(s []byte) []byte { 1286 return s // TODO: implement 1287 }