[ dkforest ].git.dasho

inline.go (26889B)
      1 //
      2 // Blackfriday Markdown Processor
      3 // Available at http://github.com/russross/blackfriday
      4 //
      5 // Copyright © 2011 Russ Ross <russ@russross.com>.
      6 // Distributed under the Simplified BSD License.
      7 // See README.md for details.
      8 //
      9 
     10 //
     11 // Functions to parse inline elements.
     12 //
     13 
     14 package blackfriday
     15 
     16 import (
     17 	"bytes"
     18 	"regexp"
     19 	"strconv"
     20 )
     21 
     22 var (
     23 	urlRe    = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
     24 	anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`)
     25 
     26 	// https://www.w3.org/TR/html5/syntax.html#character-references
     27 	// highest unicode code point in 17 planes (2^20): 1,114,112d =
     28 	// 7 dec digits or 6 hex digits
     29 	// named entity references can be 2-31 characters with stuff like &lt;
     30 	// at one end and &CounterClockwiseContourIntegral; at the other. There
     31 	// are also sometimes numbers at the end, although this isn't inherent
     32 	// in the specification; there are never numbers anywhere else in
     33 	// current character references, though; see &frac34; and &blk12;, etc.
     34 	// https://www.w3.org/TR/html5/syntax.html#named-character-references
     35 	//
     36 	// entity := "&" (named group | number ref) ";"
     37 	// named group := [a-zA-Z]{2,31}[0-9]{0,2}
     38 	// number ref := "#" (dec ref | hex ref)
     39 	// dec ref := [0-9]{1,7}
     40 	// hex ref := ("x" | "X") [0-9a-fA-F]{1,6}
     41 	htmlEntityRe = regexp.MustCompile(`&([a-zA-Z]{2,31}[0-9]{0,2}|#([0-9]{1,7}|[xX][0-9a-fA-F]{1,6}));`)
     42 )
     43 
     44 // Functions to parse text within a block
     45 // Each function returns the number of chars taken care of
     46 // data is the complete block being rendered
     47 // offset is the number of valid chars before the current cursor
     48 
     49 func (p *Markdown) inline(currBlock *Node, data []byte) {
     50 	// handlers might call us recursively: enforce a maximum depth
     51 	if p.nesting >= p.maxNesting || len(data) == 0 {
     52 		return
     53 	}
     54 	p.nesting++
     55 	beg, end := 0, 0
     56 	for end < len(data) {
     57 		handler := p.inlineCallback[data[end]]
     58 		if handler != nil {
     59 			if consumed, node := handler(p, data, end); consumed == 0 {
     60 				// No action from the callback.
     61 				end++
     62 			} else {
     63 				// Copy inactive chars into the output.
     64 				currBlock.AppendChild(text(data[beg:end]))
     65 				if node != nil {
     66 					currBlock.AppendChild(node)
     67 				}
     68 				// Skip past whatever the callback used.
     69 				beg = end + consumed
     70 				end = beg
     71 			}
     72 		} else {
     73 			end++
     74 		}
     75 	}
     76 	if beg < len(data) {
     77 		if data[end-1] == '\n' {
     78 			end--
     79 		}
     80 		currBlock.AppendChild(text(data[beg:end]))
     81 	}
     82 	p.nesting--
     83 }
     84 
     85 func censored(p *Markdown, data []byte, offset int) (int, *Node) {
     86 	data = data[offset:]
     87 	c := data[0]
     88 
     89 	if len(data) > 2 && data[1] != c {
     90 		ret, node := helperCensored(p, data[1:], c)
     91 		if ret == 0 {
     92 			return 0, nil
     93 		}
     94 
     95 		return ret + 1, node
     96 	}
     97 
     98 	return 0, nil
     99 }
    100 
    101 // single and double emphasis parsing
    102 func emphasis(p *Markdown, data []byte, offset int) (int, *Node) {
    103 	data = data[offset:]
    104 	c := data[0]
    105 
    106 	if len(data) > 2 && data[1] != c {
    107 		// whitespace cannot follow an opening emphasis;
    108 		// strikethrough only takes two characters '~~'
    109 		if c == '~' || isspace(data[1]) {
    110 			return 0, nil
    111 		}
    112 		ret, node := helperEmphasis(p, data[1:], c)
    113 		if ret == 0 {
    114 			return 0, nil
    115 		}
    116 
    117 		return ret + 1, node
    118 	}
    119 
    120 	if len(data) > 3 && data[1] == c && data[2] != c {
    121 		if isspace(data[2]) {
    122 			return 0, nil
    123 		}
    124 		ret, node := helperDoubleEmphasis(p, data[2:], c)
    125 		if ret == 0 {
    126 			return 0, nil
    127 		}
    128 
    129 		return ret + 2, node
    130 	}
    131 
    132 	if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c {
    133 		if c == '~' || isspace(data[3]) {
    134 			return 0, nil
    135 		}
    136 		ret, node := helperTripleEmphasis(p, data, 3, c)
    137 		if ret == 0 {
    138 			return 0, nil
    139 		}
    140 
    141 		return ret + 3, node
    142 	}
    143 
    144 	return 0, nil
    145 }
    146 
    147 func codeSpan(p *Markdown, data []byte, offset int) (int, *Node) {
    148 	data = data[offset:]
    149 
    150 	nb := 0
    151 
    152 	// count the number of backticks in the delimiter
    153 	for nb < len(data) && data[nb] == '`' {
    154 		nb++
    155 	}
    156 
    157 	// find the next delimiter
    158 	i, end := 0, 0
    159 	for end = nb; end < len(data) && i < nb; end++ {
    160 		if data[end] == '`' {
    161 			i++
    162 		} else {
    163 			i = 0
    164 		}
    165 	}
    166 
    167 	// no matching delimiter?
    168 	if i < nb && end >= len(data) {
    169 		return 0, nil
    170 	}
    171 
    172 	// trim outside whitespace
    173 	fBegin := nb
    174 	for fBegin < end && data[fBegin] == ' ' {
    175 		fBegin++
    176 	}
    177 
    178 	fEnd := end - nb
    179 	for fEnd > fBegin && data[fEnd-1] == ' ' {
    180 		fEnd--
    181 	}
    182 
    183 	// render the code span
    184 	if fBegin != fEnd {
    185 		code := NewNode(Code)
    186 		code.Literal = data[fBegin:fEnd]
    187 		return end, code
    188 	}
    189 
    190 	return end, nil
    191 }
    192 
    193 // newline preceded by two spaces becomes <br>
    194 func maybeLineBreak(p *Markdown, data []byte, offset int) (int, *Node) {
    195 	origOffset := offset
    196 	for offset < len(data) && data[offset] == ' ' {
    197 		offset++
    198 	}
    199 
    200 	if offset < len(data) && data[offset] == '\n' {
    201 		if offset-origOffset >= 2 {
    202 			return offset - origOffset + 1, NewNode(Hardbreak)
    203 		}
    204 		return offset - origOffset, nil
    205 	}
    206 	return 0, nil
    207 }
    208 
    209 // newline without two spaces works when HardLineBreak is enabled
    210 func lineBreak(p *Markdown, data []byte, offset int) (int, *Node) {
    211 	if p.extensions&HardLineBreak != 0 {
    212 		return 1, NewNode(Hardbreak)
    213 	}
    214 	return 0, nil
    215 }
    216 
    217 type linkType int
    218 
    219 const (
    220 	linkNormal linkType = iota
    221 	linkImg
    222 	linkDeferredFootnote
    223 	linkInlineFootnote
    224 )
    225 
    226 func isReferenceStyleLink(data []byte, pos int, t linkType) bool {
    227 	if t == linkDeferredFootnote {
    228 		return false
    229 	}
    230 	return pos < len(data)-1 && data[pos] == '[' && data[pos+1] != '^'
    231 }
    232 
    233 func maybeImage(p *Markdown, data []byte, offset int) (int, *Node) {
    234 	if offset < len(data)-1 && data[offset+1] == '[' {
    235 		return link(p, data, offset)
    236 	}
    237 	return 0, nil
    238 }
    239 
    240 func maybeInlineFootnote(p *Markdown, data []byte, offset int) (int, *Node) {
    241 	if offset < len(data)-1 && data[offset+1] == '[' {
    242 		return link(p, data, offset)
    243 	}
    244 	return 0, nil
    245 }
    246 
    247 // '[': parse a link or an image or a footnote
    248 func link(p *Markdown, data []byte, offset int) (int, *Node) {
    249 	// no links allowed inside regular links, footnote, and deferred footnotes
    250 	if p.insideLink && (offset > 0 && data[offset-1] == '[' || len(data)-1 > offset && data[offset+1] == '^') {
    251 		return 0, nil
    252 	}
    253 
    254 	var t linkType
    255 	switch {
    256 	// special case: ![^text] == deferred footnote (that follows something with
    257 	// an exclamation point)
    258 	case p.extensions&Footnotes != 0 && len(data)-1 > offset && data[offset+1] == '^':
    259 		t = linkDeferredFootnote
    260 	// ![alt] == image
    261 	case offset >= 0 && data[offset] == '!':
    262 		t = linkImg
    263 		offset++
    264 	// ^[text] == inline footnote
    265 	// [^refId] == deferred footnote
    266 	case p.extensions&Footnotes != 0:
    267 		if offset >= 0 && data[offset] == '^' {
    268 			t = linkInlineFootnote
    269 			offset++
    270 		} else if len(data)-1 > offset && data[offset+1] == '^' {
    271 			t = linkDeferredFootnote
    272 		}
    273 	// [text] == regular link
    274 	default:
    275 		t = linkNormal
    276 	}
    277 
    278 	data = data[offset:]
    279 
    280 	var (
    281 		i                       = 1
    282 		noteID                  int
    283 		title, link, altContent []byte
    284 		textHasNl               = false
    285 	)
    286 
    287 	if t == linkDeferredFootnote {
    288 		i++
    289 	}
    290 
    291 	// look for the matching closing bracket
    292 	for level := 1; level > 0 && i < len(data); i++ {
    293 		switch {
    294 		case data[i] == '\n':
    295 			textHasNl = true
    296 
    297 		case isBackslashEscaped(data, i):
    298 			continue
    299 
    300 		case data[i] == '[':
    301 			level++
    302 
    303 		case data[i] == ']':
    304 			level--
    305 			if level <= 0 {
    306 				i-- // compensate for extra i++ in for loop
    307 			}
    308 		}
    309 	}
    310 
    311 	if i >= len(data) {
    312 		return 0, nil
    313 	}
    314 
    315 	txtE := i
    316 	i++
    317 	var footnoteNode *Node
    318 
    319 	// skip any amount of whitespace or newline
    320 	// (this is much more lax than original markdown syntax)
    321 	for i < len(data) && isspace(data[i]) {
    322 		i++
    323 	}
    324 
    325 	// inline style link
    326 	switch {
    327 	case i < len(data) && data[i] == '(':
    328 		// skip initial whitespace
    329 		i++
    330 
    331 		for i < len(data) && isspace(data[i]) {
    332 			i++
    333 		}
    334 
    335 		linkB := i
    336 
    337 		// look for link end: ' " )
    338 	findlinkend:
    339 		for i < len(data) {
    340 			switch {
    341 			case data[i] == '\\':
    342 				i += 2
    343 
    344 			case data[i] == ')' || data[i] == '\'' || data[i] == '"':
    345 				break findlinkend
    346 
    347 			default:
    348 				i++
    349 			}
    350 		}
    351 
    352 		if i >= len(data) {
    353 			return 0, nil
    354 		}
    355 		linkE := i
    356 
    357 		// look for title end if present
    358 		titleB, titleE := 0, 0
    359 		if data[i] == '\'' || data[i] == '"' {
    360 			i++
    361 			titleB = i
    362 
    363 		findtitleend:
    364 			for i < len(data) {
    365 				switch {
    366 				case data[i] == '\\':
    367 					i += 2
    368 
    369 				case data[i] == ')':
    370 					break findtitleend
    371 
    372 				default:
    373 					i++
    374 				}
    375 			}
    376 
    377 			if i >= len(data) {
    378 				return 0, nil
    379 			}
    380 
    381 			// skip whitespace after title
    382 			titleE = i - 1
    383 			for titleE > titleB && isspace(data[titleE]) {
    384 				titleE--
    385 			}
    386 
    387 			// check for closing quote presence
    388 			if data[titleE] != '\'' && data[titleE] != '"' {
    389 				titleB, titleE = 0, 0
    390 				linkE = i
    391 			}
    392 		}
    393 
    394 		// remove whitespace at the end of the link
    395 		for linkE > linkB && isspace(data[linkE-1]) {
    396 			linkE--
    397 		}
    398 
    399 		// remove optional angle brackets around the link
    400 		if data[linkB] == '<' {
    401 			linkB++
    402 		}
    403 		if data[linkE-1] == '>' {
    404 			linkE--
    405 		}
    406 
    407 		// build escaped link and title
    408 		if linkE > linkB {
    409 			link = data[linkB:linkE]
    410 		}
    411 
    412 		if titleE > titleB {
    413 			title = data[titleB:titleE]
    414 		}
    415 
    416 		i++
    417 
    418 	// reference style link
    419 	case isReferenceStyleLink(data, i, t):
    420 		var id []byte
    421 		altContentConsidered := false
    422 
    423 		// look for the id
    424 		i++
    425 		linkB := i
    426 		for i < len(data) && data[i] != ']' {
    427 			i++
    428 		}
    429 		if i >= len(data) {
    430 			return 0, nil
    431 		}
    432 		linkE := i
    433 
    434 		// find the reference
    435 		if linkB == linkE {
    436 			if textHasNl {
    437 				var b bytes.Buffer
    438 
    439 				for j := 1; j < txtE; j++ {
    440 					switch {
    441 					case data[j] != '\n':
    442 						b.WriteByte(data[j])
    443 					case data[j-1] != ' ':
    444 						b.WriteByte(' ')
    445 					}
    446 				}
    447 
    448 				id = b.Bytes()
    449 			} else {
    450 				id = data[1:txtE]
    451 				altContentConsidered = true
    452 			}
    453 		} else {
    454 			id = data[linkB:linkE]
    455 		}
    456 
    457 		// find the reference with matching id
    458 		lr, ok := p.getRef(string(id))
    459 		if !ok {
    460 			return 0, nil
    461 		}
    462 
    463 		// keep link and title from reference
    464 		link = lr.link
    465 		title = lr.title
    466 		if altContentConsidered {
    467 			altContent = lr.text
    468 		}
    469 		i++
    470 
    471 	// shortcut reference style link or reference or inline footnote
    472 	default:
    473 		var id []byte
    474 
    475 		// craft the id
    476 		if textHasNl {
    477 			var b bytes.Buffer
    478 
    479 			for j := 1; j < txtE; j++ {
    480 				switch {
    481 				case data[j] != '\n':
    482 					b.WriteByte(data[j])
    483 				case data[j-1] != ' ':
    484 					b.WriteByte(' ')
    485 				}
    486 			}
    487 
    488 			id = b.Bytes()
    489 		} else {
    490 			if t == linkDeferredFootnote {
    491 				id = data[2:txtE] // get rid of the ^
    492 			} else {
    493 				id = data[1:txtE]
    494 			}
    495 		}
    496 
    497 		footnoteNode = NewNode(Item)
    498 		if t == linkInlineFootnote {
    499 			// create a new reference
    500 			noteID = len(p.notes) + 1
    501 
    502 			var fragment []byte
    503 			if len(id) > 0 {
    504 				if len(id) < 16 {
    505 					fragment = make([]byte, len(id))
    506 				} else {
    507 					fragment = make([]byte, 16)
    508 				}
    509 				copy(fragment, slugify(id))
    510 			} else {
    511 				fragment = append([]byte("footnote-"), []byte(strconv.Itoa(noteID))...)
    512 			}
    513 
    514 			ref := &reference{
    515 				noteID:   noteID,
    516 				hasBlock: false,
    517 				link:     fragment,
    518 				title:    id,
    519 				footnote: footnoteNode,
    520 			}
    521 
    522 			p.notes = append(p.notes, ref)
    523 
    524 			link = ref.link
    525 			title = ref.title
    526 		} else {
    527 			// find the reference with matching id
    528 			lr, ok := p.getRef(string(id))
    529 			if !ok {
    530 				return 0, nil
    531 			}
    532 
    533 			if t == linkDeferredFootnote {
    534 				lr.noteID = len(p.notes) + 1
    535 				lr.footnote = footnoteNode
    536 				p.notes = append(p.notes, lr)
    537 			}
    538 
    539 			// keep link and title from reference
    540 			link = lr.link
    541 			// if inline footnote, title == footnote contents
    542 			title = lr.title
    543 			noteID = lr.noteID
    544 		}
    545 
    546 		// rewind the whitespace
    547 		i = txtE + 1
    548 	}
    549 
    550 	var uLink []byte
    551 	if t == linkNormal || t == linkImg {
    552 		if len(link) > 0 {
    553 			var uLinkBuf bytes.Buffer
    554 			unescapeText(&uLinkBuf, link)
    555 			uLink = uLinkBuf.Bytes()
    556 		}
    557 
    558 		// links need something to click on and somewhere to go
    559 		if len(uLink) == 0 || (t == linkNormal && txtE <= 1) {
    560 			return 0, nil
    561 		}
    562 	}
    563 
    564 	// call the relevant rendering function
    565 	var linkNode *Node
    566 	switch t {
    567 	case linkNormal:
    568 		linkNode = NewNode(Link)
    569 		linkNode.Destination = normalizeURI(uLink)
    570 		linkNode.Title = title
    571 		if len(altContent) > 0 {
    572 			linkNode.AppendChild(text(altContent))
    573 		} else {
    574 			// links cannot contain other links, so turn off link parsing
    575 			// temporarily and recurse
    576 			insideLink := p.insideLink
    577 			p.insideLink = true
    578 			p.inline(linkNode, data[1:txtE])
    579 			p.insideLink = insideLink
    580 		}
    581 
    582 	case linkImg:
    583 		linkNode = NewNode(Image)
    584 		linkNode.Destination = uLink
    585 		linkNode.Title = title
    586 		linkNode.AppendChild(text(data[1:txtE]))
    587 		i++
    588 
    589 	case linkInlineFootnote, linkDeferredFootnote:
    590 		linkNode = NewNode(Link)
    591 		linkNode.Destination = link
    592 		linkNode.Title = title
    593 		linkNode.NoteID = noteID
    594 		linkNode.Footnote = footnoteNode
    595 		if t == linkInlineFootnote {
    596 			i++
    597 		}
    598 
    599 	default:
    600 		return 0, nil
    601 	}
    602 
    603 	return i, linkNode
    604 }
    605 
    606 func (p *Markdown) inlineHTMLComment(data []byte) int {
    607 	if len(data) < 5 {
    608 		return 0
    609 	}
    610 	if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' {
    611 		return 0
    612 	}
    613 	i := 5
    614 	// scan for an end-of-comment marker, across lines if necessary
    615 	for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
    616 		i++
    617 	}
    618 	// no end-of-comment marker
    619 	if i >= len(data) {
    620 		return 0
    621 	}
    622 	return i + 1
    623 }
    624 
    625 func stripMailto(link []byte) []byte {
    626 	if bytes.HasPrefix(link, []byte("mailto://")) {
    627 		return link[9:]
    628 	} else if bytes.HasPrefix(link, []byte("mailto:")) {
    629 		return link[7:]
    630 	} else {
    631 		return link
    632 	}
    633 }
    634 
    635 // autolinkType specifies a kind of autolink that gets detected.
    636 type autolinkType int
    637 
    638 // These are the possible flag values for the autolink renderer.
    639 const (
    640 	notAutolink autolinkType = iota
    641 	normalAutolink
    642 	emailAutolink
    643 )
    644 
    645 // '<' when tags or autolinks are allowed
    646 func leftAngle(p *Markdown, data []byte, offset int) (int, *Node) {
    647 	data = data[offset:]
    648 	altype, end := tagLength(data)
    649 	if size := p.inlineHTMLComment(data); size > 0 {
    650 		end = size
    651 	}
    652 	if end > 2 {
    653 		if altype != notAutolink {
    654 			var uLink bytes.Buffer
    655 			unescapeText(&uLink, data[1:end+1-2])
    656 			if uLink.Len() > 0 {
    657 				link := uLink.Bytes()
    658 				node := NewNode(Link)
    659 				node.Destination = link
    660 				if altype == emailAutolink {
    661 					node.Destination = append([]byte("mailto:"), link...)
    662 				}
    663 				node.AppendChild(text(stripMailto(link)))
    664 				return end, node
    665 			}
    666 		} else {
    667 			htmlTag := NewNode(HTMLSpan)
    668 			htmlTag.Literal = data[:end]
    669 			return end, htmlTag
    670 		}
    671 	}
    672 
    673 	return end, nil
    674 }
    675 
    676 // '\\' backslash escape
    677 var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>~")
    678 
    679 func escape(p *Markdown, data []byte, offset int) (int, *Node) {
    680 	data = data[offset:]
    681 
    682 	if len(data) > 1 {
    683 		if p.extensions&ManualLineBreak != 0 && data[1] == 'n' {
    684 			return 2, NewNode(Hardbreak)
    685 		}
    686 		if p.extensions&BackslashLineBreak != 0 && data[1] == '\n' {
    687 			return 2, NewNode(Hardbreak)
    688 		}
    689 		if bytes.IndexByte(escapeChars, data[1]) < 0 {
    690 			return 0, nil
    691 		}
    692 
    693 		return 2, text(data[1:2])
    694 	}
    695 
    696 	return 2, nil
    697 }
    698 
    699 func unescapeText(ob *bytes.Buffer, src []byte) {
    700 	i := 0
    701 	for i < len(src) {
    702 		org := i
    703 		for i < len(src) && src[i] != '\\' {
    704 			i++
    705 		}
    706 
    707 		if i > org {
    708 			ob.Write(src[org:i])
    709 		}
    710 
    711 		if i+1 >= len(src) {
    712 			break
    713 		}
    714 
    715 		ob.WriteByte(src[i+1])
    716 		i += 2
    717 	}
    718 }
    719 
    720 // '&' escaped when it doesn't belong to an entity
    721 // valid entities are assumed to be anything matching &#?[A-Za-z0-9]+;
    722 func entity(p *Markdown, data []byte, offset int) (int, *Node) {
    723 	data = data[offset:]
    724 
    725 	end := 1
    726 
    727 	if end < len(data) && data[end] == '#' {
    728 		end++
    729 	}
    730 
    731 	for end < len(data) && isalnum(data[end]) {
    732 		end++
    733 	}
    734 
    735 	if end < len(data) && data[end] == ';' {
    736 		end++ // real entity
    737 	} else {
    738 		return 0, nil // lone '&'
    739 	}
    740 
    741 	ent := data[:end]
    742 	// undo &amp; escaping or it will be converted to &amp;amp; by another
    743 	// escaper in the renderer
    744 	if bytes.Equal(ent, []byte("&amp;")) {
    745 		ent = []byte{'&'}
    746 	}
    747 
    748 	return end, text(ent)
    749 }
    750 
    751 func linkEndsWithEntity(data []byte, linkEnd int) bool {
    752 	entityRanges := htmlEntityRe.FindAllIndex(data[:linkEnd], -1)
    753 	return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd
    754 }
    755 
    756 // hasPrefixCaseInsensitive is a custom implementation of
    757 //     strings.HasPrefix(strings.ToLower(s), prefix)
    758 // we rolled our own because ToLower pulls in a huge machinery of lowercasing
    759 // anything from Unicode and that's very slow. Since this func will only be
    760 // used on ASCII protocol prefixes, we can take shortcuts.
    761 func hasPrefixCaseInsensitive(s, prefix []byte) bool {
    762 	if len(s) < len(prefix) {
    763 		return false
    764 	}
    765 	delta := byte('a' - 'A')
    766 	for i, b := range prefix {
    767 		if b != s[i] && b != s[i]+delta {
    768 			return false
    769 		}
    770 	}
    771 	return true
    772 }
    773 
    774 var protocolPrefixes = [][]byte{
    775 	[]byte("http://"),
    776 	[]byte("https://"),
    777 	[]byte("ftp://"),
    778 	[]byte("file://"),
    779 	[]byte("mailto:"),
    780 }
    781 
    782 const shortestPrefix = 6 // len("ftp://"), the shortest of the above
    783 
    784 func maybeAutoLink(p *Markdown, data []byte, offset int) (int, *Node) {
    785 	// quick check to rule out most false hits
    786 	if p.insideLink || len(data) < offset+shortestPrefix {
    787 		return 0, nil
    788 	}
    789 	for _, prefix := range protocolPrefixes {
    790 		endOfHead := offset + 8 // 8 is the len() of the longest prefix
    791 		if endOfHead > len(data) {
    792 			endOfHead = len(data)
    793 		}
    794 		if hasPrefixCaseInsensitive(data[offset:endOfHead], prefix) {
    795 			return autoLink(p, data, offset)
    796 		}
    797 	}
    798 	return 0, nil
    799 }
    800 
    801 func autoLink(p *Markdown, data []byte, offset int) (int, *Node) {
    802 	// Now a more expensive check to see if we're not inside an anchor element
    803 	anchorStart := offset
    804 	offsetFromAnchor := 0
    805 	for anchorStart > 0 && data[anchorStart] != '<' {
    806 		anchorStart--
    807 		offsetFromAnchor++
    808 	}
    809 
    810 	anchorStr := anchorRe.Find(data[anchorStart:])
    811 	if anchorStr != nil {
    812 		anchorClose := NewNode(HTMLSpan)
    813 		anchorClose.Literal = anchorStr[offsetFromAnchor:]
    814 		return len(anchorStr) - offsetFromAnchor, anchorClose
    815 	}
    816 
    817 	// scan backward for a word boundary
    818 	rewind := 0
    819 	for offset-rewind > 0 && rewind <= 7 && isletter(data[offset-rewind-1]) {
    820 		rewind++
    821 	}
    822 	if rewind > 6 { // longest supported protocol is "mailto" which has 6 letters
    823 		return 0, nil
    824 	}
    825 
    826 	origData := data
    827 	data = data[offset-rewind:]
    828 
    829 	if !isSafeLink(data) {
    830 		return 0, nil
    831 	}
    832 
    833 	linkEnd := 0
    834 	for linkEnd < len(data) && !isEndOfLink(data[linkEnd]) {
    835 		linkEnd++
    836 	}
    837 
    838 	// Skip punctuation at the end of the link
    839 	if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',') && data[linkEnd-2] != '\\' {
    840 		linkEnd--
    841 	}
    842 
    843 	// But don't skip semicolon if it's a part of escaped entity:
    844 	if data[linkEnd-1] == ';' && data[linkEnd-2] != '\\' && !linkEndsWithEntity(data, linkEnd) {
    845 		linkEnd--
    846 	}
    847 
    848 	// See if the link finishes with a punctuation sign that can be closed.
    849 	var copen byte
    850 	switch data[linkEnd-1] {
    851 	case '"':
    852 		copen = '"'
    853 	case '\'':
    854 		copen = '\''
    855 	case ')':
    856 		copen = '('
    857 	case ']':
    858 		copen = '['
    859 	case '}':
    860 		copen = '{'
    861 	default:
    862 		copen = 0
    863 	}
    864 
    865 	if copen != 0 {
    866 		bufEnd := offset - rewind + linkEnd - 2
    867 
    868 		openDelim := 1
    869 
    870 		/* Try to close the final punctuation sign in this same line;
    871 		 * if we managed to close it outside of the URL, that means that it's
    872 		 * not part of the URL. If it closes inside the URL, that means it
    873 		 * is part of the URL.
    874 		 *
    875 		 * Examples:
    876 		 *
    877 		 *      foo http://www.pokemon.com/Pikachu_(Electric) bar
    878 		 *              => http://www.pokemon.com/Pikachu_(Electric)
    879 		 *
    880 		 *      foo (http://www.pokemon.com/Pikachu_(Electric)) bar
    881 		 *              => http://www.pokemon.com/Pikachu_(Electric)
    882 		 *
    883 		 *      foo http://www.pokemon.com/Pikachu_(Electric)) bar
    884 		 *              => http://www.pokemon.com/Pikachu_(Electric))
    885 		 *
    886 		 *      (foo http://www.pokemon.com/Pikachu_(Electric)) bar
    887 		 *              => foo http://www.pokemon.com/Pikachu_(Electric)
    888 		 */
    889 
    890 		for bufEnd >= 0 && origData[bufEnd] != '\n' && openDelim != 0 {
    891 			if origData[bufEnd] == data[linkEnd-1] {
    892 				openDelim++
    893 			}
    894 
    895 			if origData[bufEnd] == copen {
    896 				openDelim--
    897 			}
    898 
    899 			bufEnd--
    900 		}
    901 
    902 		if openDelim == 0 {
    903 			linkEnd--
    904 		}
    905 	}
    906 
    907 	var uLink bytes.Buffer
    908 	unescapeText(&uLink, data[:linkEnd])
    909 
    910 	if uLink.Len() > 0 {
    911 		node := NewNode(Link)
    912 		node.Destination = uLink.Bytes()
    913 		node.AppendChild(text(uLink.Bytes()))
    914 		return linkEnd, node
    915 	}
    916 
    917 	return linkEnd, nil
    918 }
    919 
    920 func isEndOfLink(char byte) bool {
    921 	return isspace(char) || char == '<'
    922 }
    923 
    924 var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")}
    925 var validPaths = [][]byte{[]byte("/"), []byte("./"), []byte("../")}
    926 
    927 func isSafeLink(link []byte) bool {
    928 	for _, path := range validPaths {
    929 		if len(link) >= len(path) && bytes.Equal(link[:len(path)], path) {
    930 			if len(link) == len(path) {
    931 				return true
    932 			} else if isalnum(link[len(path)]) {
    933 				return true
    934 			}
    935 		}
    936 	}
    937 
    938 	for _, prefix := range validUris {
    939 		// TODO: handle unicode here
    940 		// case-insensitive prefix test
    941 		if len(link) > len(prefix) && bytes.Equal(bytes.ToLower(link[:len(prefix)]), prefix) && isalnum(link[len(prefix)]) {
    942 			return true
    943 		}
    944 	}
    945 
    946 	return false
    947 }
    948 
    949 // return the length of the given tag, or 0 is it's not valid
    950 func tagLength(data []byte) (autolink autolinkType, end int) {
    951 	var i, j int
    952 
    953 	// a valid tag can't be shorter than 3 chars
    954 	if len(data) < 3 {
    955 		return notAutolink, 0
    956 	}
    957 
    958 	// begins with a '<' optionally followed by '/', followed by letter or number
    959 	if data[0] != '<' {
    960 		return notAutolink, 0
    961 	}
    962 	if data[1] == '/' {
    963 		i = 2
    964 	} else {
    965 		i = 1
    966 	}
    967 
    968 	if !isalnum(data[i]) {
    969 		return notAutolink, 0
    970 	}
    971 
    972 	// scheme test
    973 	autolink = notAutolink
    974 
    975 	// try to find the beginning of an URI
    976 	for i < len(data) && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') {
    977 		i++
    978 	}
    979 
    980 	if i > 1 && i < len(data) && data[i] == '@' {
    981 		if j = isMailtoAutoLink(data[i:]); j != 0 {
    982 			return emailAutolink, i + j
    983 		}
    984 	}
    985 
    986 	if i > 2 && i < len(data) && data[i] == ':' {
    987 		autolink = normalAutolink
    988 		i++
    989 	}
    990 
    991 	// complete autolink test: no whitespace or ' or "
    992 	switch {
    993 	case i >= len(data):
    994 		autolink = notAutolink
    995 	case autolink != notAutolink:
    996 		j = i
    997 
    998 		for i < len(data) {
    999 			if data[i] == '\\' {
   1000 				i += 2
   1001 			} else if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i]) {
   1002 				break
   1003 			} else {
   1004 				i++
   1005 			}
   1006 
   1007 		}
   1008 
   1009 		if i >= len(data) {
   1010 			return autolink, 0
   1011 		}
   1012 		if i > j && data[i] == '>' {
   1013 			return autolink, i + 1
   1014 		}
   1015 
   1016 		// one of the forbidden chars has been found
   1017 		autolink = notAutolink
   1018 	}
   1019 	i += bytes.IndexByte(data[i:], '>')
   1020 	if i < 0 {
   1021 		return autolink, 0
   1022 	}
   1023 	return autolink, i + 1
   1024 }
   1025 
   1026 // look for the address part of a mail autolink and '>'
   1027 // this is less strict than the original markdown e-mail address matching
   1028 func isMailtoAutoLink(data []byte) int {
   1029 	nb := 0
   1030 
   1031 	// address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'
   1032 	for i := 0; i < len(data); i++ {
   1033 		if isalnum(data[i]) {
   1034 			continue
   1035 		}
   1036 
   1037 		switch data[i] {
   1038 		case '@':
   1039 			nb++
   1040 
   1041 		case '-', '.', '_':
   1042 			break
   1043 
   1044 		case '>':
   1045 			if nb == 1 {
   1046 				return i + 1
   1047 			}
   1048 			return 0
   1049 		default:
   1050 			return 0
   1051 		}
   1052 	}
   1053 
   1054 	return 0
   1055 }
   1056 
   1057 // look for the next emph char, skipping other constructs
   1058 func helperFindEmphChar(data []byte, c byte) int {
   1059 	i := 0
   1060 
   1061 	for i < len(data) {
   1062 		for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' {
   1063 			i++
   1064 		}
   1065 		if i >= len(data) {
   1066 			return 0
   1067 		}
   1068 		// do not count escaped chars
   1069 		if i != 0 && data[i-1] == '\\' {
   1070 			i++
   1071 			continue
   1072 		}
   1073 		if data[i] == c {
   1074 			return i
   1075 		}
   1076 
   1077 		if data[i] == '`' {
   1078 			// skip a code span
   1079 			tmpI := 0
   1080 			i++
   1081 			for i < len(data) && data[i] != '`' {
   1082 				if tmpI == 0 && data[i] == c {
   1083 					tmpI = i
   1084 				}
   1085 				i++
   1086 			}
   1087 			if i >= len(data) {
   1088 				return tmpI
   1089 			}
   1090 			i++
   1091 		} else if data[i] == '[' {
   1092 			// skip a link
   1093 			tmpI := 0
   1094 			i++
   1095 			for i < len(data) && data[i] != ']' {
   1096 				if tmpI == 0 && data[i] == c {
   1097 					tmpI = i
   1098 				}
   1099 				i++
   1100 			}
   1101 			i++
   1102 			for i < len(data) && (data[i] == ' ' || data[i] == '\n') {
   1103 				i++
   1104 			}
   1105 			if i >= len(data) {
   1106 				return tmpI
   1107 			}
   1108 			if data[i] != '[' && data[i] != '(' { // not a link
   1109 				if tmpI > 0 {
   1110 					return tmpI
   1111 				}
   1112 				continue
   1113 			}
   1114 			cc := data[i]
   1115 			i++
   1116 			for i < len(data) && data[i] != cc {
   1117 				if tmpI == 0 && data[i] == c {
   1118 					return i
   1119 				}
   1120 				i++
   1121 			}
   1122 			if i >= len(data) {
   1123 				return tmpI
   1124 			}
   1125 			i++
   1126 		}
   1127 	}
   1128 	return 0
   1129 }
   1130 
   1131 func helperCensored(p *Markdown, data []byte, c byte) (int, *Node) {
   1132 	i := 0
   1133 
   1134 	// skip one symbol if coming from emph3
   1135 	if len(data) > 1 && data[0] == c && data[1] == c {
   1136 		i = 1
   1137 	}
   1138 
   1139 	for i < len(data) {
   1140 		length := helperFindEmphChar(data[i:], c)
   1141 		if length == 0 {
   1142 			return 0, nil
   1143 		}
   1144 		i += length
   1145 		if i >= len(data) {
   1146 			return 0, nil
   1147 		}
   1148 
   1149 		if i+1 < len(data) && data[i+1] == c {
   1150 			i++
   1151 			continue
   1152 		}
   1153 
   1154 		if data[i] == c && !isspace(data[i-1]) {
   1155 
   1156 			if p.extensions&NoIntraEmphasis != 0 {
   1157 				if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) {
   1158 					continue
   1159 				}
   1160 			}
   1161 
   1162 			emph := NewNode(Censored)
   1163 			p.inline(emph, data[:i])
   1164 			return i + 1, emph
   1165 		}
   1166 	}
   1167 
   1168 	return 0, nil
   1169 }
   1170 
   1171 func helperEmphasis(p *Markdown, data []byte, c byte) (int, *Node) {
   1172 	i := 0
   1173 
   1174 	// skip one symbol if coming from emph3
   1175 	if len(data) > 1 && data[0] == c && data[1] == c {
   1176 		i = 1
   1177 	}
   1178 
   1179 	for i < len(data) {
   1180 		length := helperFindEmphChar(data[i:], c)
   1181 		if length == 0 {
   1182 			return 0, nil
   1183 		}
   1184 		i += length
   1185 		if i >= len(data) {
   1186 			return 0, nil
   1187 		}
   1188 
   1189 		if i+1 < len(data) && data[i+1] == c {
   1190 			i++
   1191 			continue
   1192 		}
   1193 
   1194 		if data[i] == c && !isspace(data[i-1]) {
   1195 
   1196 			if p.extensions&NoIntraEmphasis != 0 {
   1197 				if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) {
   1198 					continue
   1199 				}
   1200 			}
   1201 
   1202 			emph := NewNode(Emph)
   1203 			p.inline(emph, data[:i])
   1204 			return i + 1, emph
   1205 		}
   1206 	}
   1207 
   1208 	return 0, nil
   1209 }
   1210 
   1211 func helperDoubleEmphasis(p *Markdown, data []byte, c byte) (int, *Node) {
   1212 	i := 0
   1213 
   1214 	for i < len(data) {
   1215 		length := helperFindEmphChar(data[i:], c)
   1216 		if length == 0 {
   1217 			return 0, nil
   1218 		}
   1219 		i += length
   1220 
   1221 		if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) {
   1222 			nodeType := Strong
   1223 			if c == '~' {
   1224 				nodeType = Del
   1225 			}
   1226 			node := NewNode(nodeType)
   1227 			p.inline(node, data[:i])
   1228 			return i + 2, node
   1229 		}
   1230 		i++
   1231 	}
   1232 	return 0, nil
   1233 }
   1234 
   1235 func helperTripleEmphasis(p *Markdown, data []byte, offset int, c byte) (int, *Node) {
   1236 	i := 0
   1237 	origData := data
   1238 	data = data[offset:]
   1239 
   1240 	for i < len(data) {
   1241 		length := helperFindEmphChar(data[i:], c)
   1242 		if length == 0 {
   1243 			return 0, nil
   1244 		}
   1245 		i += length
   1246 
   1247 		// skip whitespace preceded symbols
   1248 		if data[i] != c || isspace(data[i-1]) {
   1249 			continue
   1250 		}
   1251 
   1252 		switch {
   1253 		case i+2 < len(data) && data[i+1] == c && data[i+2] == c:
   1254 			// triple symbol found
   1255 			strong := NewNode(Strong)
   1256 			em := NewNode(Emph)
   1257 			strong.AppendChild(em)
   1258 			p.inline(em, data[:i])
   1259 			return i + 3, strong
   1260 		case (i+1 < len(data) && data[i+1] == c):
   1261 			// double symbol found, hand over to emph1
   1262 			length, node := helperEmphasis(p, origData[offset-2:], c)
   1263 			if length == 0 {
   1264 				return 0, nil
   1265 			}
   1266 			return length - 2, node
   1267 		default:
   1268 			// single symbol found, hand over to emph2
   1269 			length, node := helperDoubleEmphasis(p, origData[offset-1:], c)
   1270 			if length == 0 {
   1271 				return 0, nil
   1272 			}
   1273 			return length - 1, node
   1274 		}
   1275 	}
   1276 	return 0, nil
   1277 }
   1278 
   1279 func text(s []byte) *Node {
   1280 	node := NewNode(Text)
   1281 	node.Literal = s
   1282 	return node
   1283 }
   1284 
   1285 func normalizeURI(s []byte) []byte {
   1286 	return s // TODO: implement
   1287 }
	dkforest A forum and chat platform (onion)
	git clone https://git.dasho.dev/n0tr1v/dkforest.git
	Log \| Files \| Refs \| LICENSE