neovim

Neovim text editor
git clone https://git.dasho.dev/neovim.git
Log | Files | Refs | README

dtd.vim (12072B)


      1 " Vim indent file
      2 " Language:		DTD (Document Type Definition for XML)
      3 " Maintainer:		Doug Kearns <dougkearns@gmail.com>
      4 " Previous Maintainer:	Nikolai Weibull <now@bitwi.se>
      5 " Last Change:		24 Sep 2021
      6 
      7 " Only load this indent file when no other was loaded.
      8 if exists("b:did_indent")
      9  finish
     10 endif
     11 let b:did_indent = 1
     12 
     13 setlocal indentexpr=GetDTDIndent()
     14 setlocal indentkeys=!^F,o,O,>
     15 setlocal nosmartindent
     16 
     17 let b:undo_indent = "setl inde< indk< si<"
     18 
     19 if exists("*GetDTDIndent")
     20  finish
     21 endif
     22 
     23 let s:cpo_save = &cpo
     24 set cpo&vim
     25 
     26 " TODO: Needs to be adjusted to stop at [, <, and ].
     27 let s:token_pattern = '^[^[:space:]]\+'
     28 
     29 function s:lex1(input, start, ...)
     30  let pattern = a:0 > 0 ? a:1 : s:token_pattern
     31  let start = matchend(a:input, '^\_s*', a:start)
     32  if start == -1
     33    return ["", a:start]
     34  endif
     35  let end = matchend(a:input, pattern, start)
     36  if end == -1
     37    return ["", a:start]
     38  endif
     39  let token = strpart(a:input, start, end - start)
     40  return [token, end]
     41 endfunction
     42 
     43 function s:lex(input, start, ...)
     44  let pattern = a:0 > 0 ? a:1 : s:token_pattern
     45  let info = s:lex1(a:input, a:start, pattern)
     46  while info[0] == '--'
     47    let info = s:lex1(a:input, info[1], pattern)
     48    while info[0] != "" && info[0] != '--'
     49      let info = s:lex1(a:input, info[1], pattern)
     50    endwhile
     51    if info[0] == ""
     52      return info
     53    endif
     54    let info = s:lex1(a:input, info[1], pattern)
     55  endwhile
     56  return info
     57 endfunction
     58 
     59 function s:indent_to_innermost_parentheses(line, end)
     60  let token = '('
     61  let end = a:end
     62  let parentheses = [end - 1]
     63  while token != ""
     64    let [token, end] = s:lex(a:line, end, '^\%([(),|]\|[A-Za-z0-9_-]\+\|#P\=CDATA\|%[A-Za-z0-9_-]\+;\)[?*+]\=')
     65    if token[0] == '('
     66      call add(parentheses, end - 1)
     67    elseif token[0] == ')'
     68      if len(parentheses) == 1
     69        return [-1, end]
     70      endif
     71      call remove(parentheses, -1)
     72    endif
     73  endwhile
     74  return [parentheses[-1] - strridx(a:line, "\n", parentheses[-1]), end]
     75 endfunction
     76 
     77 " TODO: Line and end could be script global (think OO members).
     78 function GetDTDIndent()
     79  if v:lnum == 1
     80    return 0
     81  endif
     82  
     83  " Begin by searching back for a <! that isn’t inside a comment.
     84  " From here, depending on what follows immediately after, parse to
     85  " where we’re at to determine what to do.
     86  if search('<!', 'bceW') == 0
     87    return indent(v:lnum - 1)
     88  endif
     89  let lnum = line('.')
     90  let col = col('.')
     91  let indent = indent('.')
     92  let line = lnum == v:lnum ? getline(lnum) : join(getline(lnum, v:lnum - 1), "\n")
     93 
     94  let [declaration, end] = s:lex1(line, col)
     95  if declaration == ""
     96    return indent + shiftwidth()
     97  elseif declaration == '--'
     98    " We’re looking at a comment.  Now, simply determine if the comment is
     99    " terminated or not.  If it isn’t, let Vim take care of that using
    100    " 'comments' and 'autoindent'. Otherwise, indent to the first lines level.
    101    while declaration != ""
    102      let [declaration, end] = s:lex(line, end)
    103      if declaration == "-->"
    104        return indent
    105      endif
    106    endwhile
    107    return -1
    108  elseif declaration == 'ELEMENT'
    109    " Check for element name.  If none exists, indent one level.
    110    let [name, end] = s:lex(line, end)
    111    if name == ""
    112      return indent + shiftwidth()
    113    endif
    114 
    115    " Check for token following element name.  This can be a specification of
    116    " whether the start or end tag may be omitted.  If nothing is found, indent
    117    " one level.
    118    let [token, end] = s:lex(line, end, '^\%([-O(]\|ANY\|EMPTY\)')
    119    let n = 0
    120    while token =~ '[-O]' && n < 2
    121      let [token, end] = s:lex(line, end, '^\%([-O(]\|ANY\|EMPTY\)')
    122      let n += 1
    123    endwhile
    124    if token == ""
    125      return indent + shiftwidth()
    126    endif
    127 
    128    " Next comes the content model.  If the token we’ve found isn’t a
    129    " parenthesis it must be either ANY, EMPTY or some random junk.  Either
    130    " way, we’re done indenting this element, so set it to that of the first
    131    " line so that the terminating “>” winds up having the same indentation.
    132    if token != '('
    133      return indent
    134    endif
    135 
    136    " Now go through the content model.  We need to keep track of the nesting
    137    " of parentheses.  As soon as we hit 0 we’re done.  If that happens we must
    138    " have a complete content model.  Thus set indentation to be the same as that
    139    " of the first line so that the terminating “>” winds up having the same
    140    " indentation.  Otherwise, we’ll indent to the innermost parentheses not yet
    141    " matched.
    142    let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end)
    143    if indent_of_innermost != -1
    144      return indent_of_innermost
    145    endif
    146 
    147    " Finally, look for any additions and/or exceptions to the content model.
    148    " This is defined by a “+” or “-” followed by another content model
    149    " declaration.
    150    " TODO: Can the “-” be separated by whitespace from the “(”?
    151    let seen = { '+(': 0, '-(': 0 }
    152    while 1
    153      let [additions_exceptions, end] = s:lex(line, end, '^[+-](')
    154      if additions_exceptions != '+(' && additions_exceptions != '-('
    155        let [token, end] = s:lex(line, end)
    156        if token == '>'
    157          return indent
    158        endif
    159        " TODO: Should use s:lex here on getline(v:lnum) and check for >.
    160        return getline(v:lnum) =~ '^\s*>' || count(values(seen), 0) == 0 ? indent : (indent + shiftwidth())
    161      endif
    162 
    163      " If we’ve seen an addition or exception already and this is of the same
    164      " kind, the user is writing a broken DTD.  Time to bail.
    165      if seen[additions_exceptions]
    166        return indent
    167      endif
    168      let seen[additions_exceptions] = 1
    169 
    170      let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end)
    171      if indent_of_innermost != -1
    172        return indent_of_innermost
    173      endif
    174    endwhile
    175  elseif declaration == 'ATTLIST'
    176    " Check for element name.  If none exists, indent one level.
    177    let [name, end] = s:lex(line, end)
    178    if name == ""
    179      return indent + shiftwidth()
    180    endif
    181 
    182    " Check for any number of attributes.
    183    while 1
    184      " Check for attribute name.  If none exists, indent one level, unless the
    185      " current line is a lone “>”, in which case we indent to the same level
    186      " as the first line.  Otherwise, if the attribute name is “>”, we have
    187      " actually hit the end of the attribute list, in which case we indent to
    188      " the same level as the first line.
    189      let [name, end] = s:lex(line, end)
    190      if name == ""
    191        " TODO: Should use s:lex here on getline(v:lnum) and check for >.
    192        return getline(v:lnum) =~ '^\s*>' ? indent : (indent + shiftwidth())
    193      elseif name == ">"
    194        return indent
    195      endif
    196 
    197      " Check for attribute value declaration.  If none exists, indent two
    198      " levels.  Otherwise, if it’s an enumerated value, check for nested
    199      " parentheses and indent to the innermost one if we don’t reach the end
    200      " of the listc.  Otherwise, just continue with looking for the default
    201      " attribute value.
    202      " TODO: Do validation of keywords
    203      " (CDATA|NMTOKEN|NMTOKENS|ID|IDREF|IDREFS|ENTITY|ENTITIES)?
    204      let [value, end] = s:lex(line, end, '^\%((\|[^[:space:]]\+\)')
    205      if value == ""
    206        return indent + shiftwidth() * 2
    207      elseif value == 'NOTATION'
    208        " If this is a enumerated value based on notations, read another token
    209        " for the actual value.  If it doesn’t exist, indent three levels.
    210        " TODO: If validating according to above, value must be equal to '('.
    211        let [value, end] = s:lex(line, end, '^\%((\|[^[:space:]]\+\)')
    212        if value == ""
    213          return indent + shiftwidth() * 3
    214        endif
    215      endif
    216 
    217      if value == '('
    218        let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end)
    219        if indent_of_innermost != -1
    220          return indent_of_innermost
    221        endif
    222      endif
    223 
    224      " Finally look for the attribute’s default value.  If non exists, indent
    225      " two levels.
    226      let [default, end] = s:lex(line, end, '^\%("\_[^"]*"\|#\(REQUIRED\|IMPLIED\|FIXED\)\)')
    227      if default == ""
    228        return indent + shiftwidth() * 2
    229      elseif default == '#FIXED'
    230        " We need to look for the fixed value.  If non exists, indent three
    231        " levels.
    232        let [default, end] = s:lex(line, end, '^"\_[^"]*"')
    233        if default == ""
    234          return indent + shiftwidth() * 3
    235        endif
    236      endif
    237    endwhile
    238  elseif declaration == 'ENTITY'
    239    " Check for entity name.  If none exists, indent one level.  Otherwise, if
    240    " the name actually turns out to be a percent sign, “%”, this is a
    241    " parameter entity.  Read another token to determine the entity name and,
    242    " again, if none exists, indent one level.
    243    let [name, end] = s:lex(line, end)
    244    if name == ""
    245      return indent + shiftwidth()
    246    elseif name == '%'
    247      let [name, end] = s:lex(line, end)
    248      if name == ""
    249        return indent + shiftwidth()
    250      endif
    251    endif
    252 
    253    " Now check for the entity value.  If none exists, indent one level.  If it
    254    " does exist, indent to same level as first line, as we’re now done with
    255    " this entity.
    256    "
    257    " The entity value can be a string in single or double quotes (no escapes
    258    " to worry about, as entities are used instead).  However, it can also be
    259    " that this is an external unparsed entity.  In that case we have to look
    260    " further for (possibly) a public ID and an URI followed by the NDATA
    261    " keyword and the actual notation name.  For the public ID and URI, indent
    262    " two levels, if they don’t exist.  If the NDATA keyword doesn’t exist,
    263    " indent one level.  Otherwise, if the actual notation name doesn’t exist,
    264    " indent two level.  If it does, indent to same level as first line, as
    265    " we’re now done with this entity.
    266    let [value, end] = s:lex(line, end)
    267    if value == ""
    268      return indent + shiftwidth()
    269    elseif value == 'SYSTEM' || value == 'PUBLIC'
    270      let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)')
    271      if quoted_string == ""
    272        return indent + shiftwidth() * 2
    273      endif
    274 
    275      if value == 'PUBLIC'
    276        let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)')
    277        if quoted_string == ""
    278          return indent + shiftwidth() * 2
    279        endif
    280      endif
    281 
    282      let [ndata, end] = s:lex(line, end)
    283      if ndata == ""
    284        return indent + shiftwidth()
    285      endif
    286 
    287      let [name, end] = s:lex(line, end)
    288      return name == "" ? (indent + shiftwidth() * 2) : indent
    289    else
    290      return indent
    291    endif
    292  elseif declaration == 'NOTATION'
    293    " Check for notation name.  If none exists, indent one level.
    294    let [name, end] = s:lex(line, end)
    295    if name == ""
    296      return indent + shiftwidth()
    297    endif
    298 
    299    " Now check for the external ID.  If none exists, indent one level.
    300    let [id, end] = s:lex(line, end)
    301    if id == ""
    302      return indent + shiftwidth()
    303    elseif id == 'SYSTEM' || id == 'PUBLIC'
    304      let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)')
    305      if quoted_string == ""
    306        return indent + shiftwidth() * 2
    307      endif
    308 
    309      if id == 'PUBLIC'
    310        let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\|>\)')
    311        if quoted_string == ""
    312          " TODO: Should use s:lex here on getline(v:lnum) and check for >.
    313          return getline(v:lnum) =~ '^\s*>' ? indent : (indent + shiftwidth() * 2)
    314        elseif quoted_string == '>'
    315          return indent
    316        endif
    317      endif
    318    endif
    319 
    320    return indent
    321  endif
    322 
    323  " TODO: Processing directives could be indented I suppose.  But perhaps it’s
    324  " just as well to let the user decide how to indent them (perhaps extending
    325  " this function to include proper support for whatever processing directive
    326  " language they want to use).
    327 
    328  " Conditional sections are simply passed along to let Vim decide what to do
    329  " (and hence the user).
    330  return -1
    331 endfunction
    332 
    333 let &cpo = s:cpo_save
    334 unlet s:cpo_save