dtd.vim (12072B)
1 " Vim indent file 2 " Language: DTD (Document Type Definition for XML) 3 " Maintainer: Doug Kearns <dougkearns@gmail.com> 4 " Previous Maintainer: Nikolai Weibull <now@bitwi.se> 5 " Last Change: 24 Sep 2021 6 7 " Only load this indent file when no other was loaded. 8 if exists("b:did_indent") 9 finish 10 endif 11 let b:did_indent = 1 12 13 setlocal indentexpr=GetDTDIndent() 14 setlocal indentkeys=!^F,o,O,> 15 setlocal nosmartindent 16 17 let b:undo_indent = "setl inde< indk< si<" 18 19 if exists("*GetDTDIndent") 20 finish 21 endif 22 23 let s:cpo_save = &cpo 24 set cpo&vim 25 26 " TODO: Needs to be adjusted to stop at [, <, and ]. 27 let s:token_pattern = '^[^[:space:]]\+' 28 29 function s:lex1(input, start, ...) 30 let pattern = a:0 > 0 ? a:1 : s:token_pattern 31 let start = matchend(a:input, '^\_s*', a:start) 32 if start == -1 33 return ["", a:start] 34 endif 35 let end = matchend(a:input, pattern, start) 36 if end == -1 37 return ["", a:start] 38 endif 39 let token = strpart(a:input, start, end - start) 40 return [token, end] 41 endfunction 42 43 function s:lex(input, start, ...) 44 let pattern = a:0 > 0 ? a:1 : s:token_pattern 45 let info = s:lex1(a:input, a:start, pattern) 46 while info[0] == '--' 47 let info = s:lex1(a:input, info[1], pattern) 48 while info[0] != "" && info[0] != '--' 49 let info = s:lex1(a:input, info[1], pattern) 50 endwhile 51 if info[0] == "" 52 return info 53 endif 54 let info = s:lex1(a:input, info[1], pattern) 55 endwhile 56 return info 57 endfunction 58 59 function s:indent_to_innermost_parentheses(line, end) 60 let token = '(' 61 let end = a:end 62 let parentheses = [end - 1] 63 while token != "" 64 let [token, end] = s:lex(a:line, end, '^\%([(),|]\|[A-Za-z0-9_-]\+\|#P\=CDATA\|%[A-Za-z0-9_-]\+;\)[?*+]\=') 65 if token[0] == '(' 66 call add(parentheses, end - 1) 67 elseif token[0] == ')' 68 if len(parentheses) == 1 69 return [-1, end] 70 endif 71 call remove(parentheses, -1) 72 endif 73 endwhile 74 return [parentheses[-1] - strridx(a:line, "\n", parentheses[-1]), end] 75 endfunction 76 77 " TODO: Line and end could be script global (think OO members). 78 function GetDTDIndent() 79 if v:lnum == 1 80 return 0 81 endif 82 83 " Begin by searching back for a <! that isn’t inside a comment. 84 " From here, depending on what follows immediately after, parse to 85 " where we’re at to determine what to do. 86 if search('<!', 'bceW') == 0 87 return indent(v:lnum - 1) 88 endif 89 let lnum = line('.') 90 let col = col('.') 91 let indent = indent('.') 92 let line = lnum == v:lnum ? getline(lnum) : join(getline(lnum, v:lnum - 1), "\n") 93 94 let [declaration, end] = s:lex1(line, col) 95 if declaration == "" 96 return indent + shiftwidth() 97 elseif declaration == '--' 98 " We’re looking at a comment. Now, simply determine if the comment is 99 " terminated or not. If it isn’t, let Vim take care of that using 100 " 'comments' and 'autoindent'. Otherwise, indent to the first lines level. 101 while declaration != "" 102 let [declaration, end] = s:lex(line, end) 103 if declaration == "-->" 104 return indent 105 endif 106 endwhile 107 return -1 108 elseif declaration == 'ELEMENT' 109 " Check for element name. If none exists, indent one level. 110 let [name, end] = s:lex(line, end) 111 if name == "" 112 return indent + shiftwidth() 113 endif 114 115 " Check for token following element name. This can be a specification of 116 " whether the start or end tag may be omitted. If nothing is found, indent 117 " one level. 118 let [token, end] = s:lex(line, end, '^\%([-O(]\|ANY\|EMPTY\)') 119 let n = 0 120 while token =~ '[-O]' && n < 2 121 let [token, end] = s:lex(line, end, '^\%([-O(]\|ANY\|EMPTY\)') 122 let n += 1 123 endwhile 124 if token == "" 125 return indent + shiftwidth() 126 endif 127 128 " Next comes the content model. If the token we’ve found isn’t a 129 " parenthesis it must be either ANY, EMPTY or some random junk. Either 130 " way, we’re done indenting this element, so set it to that of the first 131 " line so that the terminating “>” winds up having the same indentation. 132 if token != '(' 133 return indent 134 endif 135 136 " Now go through the content model. We need to keep track of the nesting 137 " of parentheses. As soon as we hit 0 we’re done. If that happens we must 138 " have a complete content model. Thus set indentation to be the same as that 139 " of the first line so that the terminating “>” winds up having the same 140 " indentation. Otherwise, we’ll indent to the innermost parentheses not yet 141 " matched. 142 let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end) 143 if indent_of_innermost != -1 144 return indent_of_innermost 145 endif 146 147 " Finally, look for any additions and/or exceptions to the content model. 148 " This is defined by a “+” or “-” followed by another content model 149 " declaration. 150 " TODO: Can the “-” be separated by whitespace from the “(”? 151 let seen = { '+(': 0, '-(': 0 } 152 while 1 153 let [additions_exceptions, end] = s:lex(line, end, '^[+-](') 154 if additions_exceptions != '+(' && additions_exceptions != '-(' 155 let [token, end] = s:lex(line, end) 156 if token == '>' 157 return indent 158 endif 159 " TODO: Should use s:lex here on getline(v:lnum) and check for >. 160 return getline(v:lnum) =~ '^\s*>' || count(values(seen), 0) == 0 ? indent : (indent + shiftwidth()) 161 endif 162 163 " If we’ve seen an addition or exception already and this is of the same 164 " kind, the user is writing a broken DTD. Time to bail. 165 if seen[additions_exceptions] 166 return indent 167 endif 168 let seen[additions_exceptions] = 1 169 170 let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end) 171 if indent_of_innermost != -1 172 return indent_of_innermost 173 endif 174 endwhile 175 elseif declaration == 'ATTLIST' 176 " Check for element name. If none exists, indent one level. 177 let [name, end] = s:lex(line, end) 178 if name == "" 179 return indent + shiftwidth() 180 endif 181 182 " Check for any number of attributes. 183 while 1 184 " Check for attribute name. If none exists, indent one level, unless the 185 " current line is a lone “>”, in which case we indent to the same level 186 " as the first line. Otherwise, if the attribute name is “>”, we have 187 " actually hit the end of the attribute list, in which case we indent to 188 " the same level as the first line. 189 let [name, end] = s:lex(line, end) 190 if name == "" 191 " TODO: Should use s:lex here on getline(v:lnum) and check for >. 192 return getline(v:lnum) =~ '^\s*>' ? indent : (indent + shiftwidth()) 193 elseif name == ">" 194 return indent 195 endif 196 197 " Check for attribute value declaration. If none exists, indent two 198 " levels. Otherwise, if it’s an enumerated value, check for nested 199 " parentheses and indent to the innermost one if we don’t reach the end 200 " of the listc. Otherwise, just continue with looking for the default 201 " attribute value. 202 " TODO: Do validation of keywords 203 " (CDATA|NMTOKEN|NMTOKENS|ID|IDREF|IDREFS|ENTITY|ENTITIES)? 204 let [value, end] = s:lex(line, end, '^\%((\|[^[:space:]]\+\)') 205 if value == "" 206 return indent + shiftwidth() * 2 207 elseif value == 'NOTATION' 208 " If this is a enumerated value based on notations, read another token 209 " for the actual value. If it doesn’t exist, indent three levels. 210 " TODO: If validating according to above, value must be equal to '('. 211 let [value, end] = s:lex(line, end, '^\%((\|[^[:space:]]\+\)') 212 if value == "" 213 return indent + shiftwidth() * 3 214 endif 215 endif 216 217 if value == '(' 218 let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end) 219 if indent_of_innermost != -1 220 return indent_of_innermost 221 endif 222 endif 223 224 " Finally look for the attribute’s default value. If non exists, indent 225 " two levels. 226 let [default, end] = s:lex(line, end, '^\%("\_[^"]*"\|#\(REQUIRED\|IMPLIED\|FIXED\)\)') 227 if default == "" 228 return indent + shiftwidth() * 2 229 elseif default == '#FIXED' 230 " We need to look for the fixed value. If non exists, indent three 231 " levels. 232 let [default, end] = s:lex(line, end, '^"\_[^"]*"') 233 if default == "" 234 return indent + shiftwidth() * 3 235 endif 236 endif 237 endwhile 238 elseif declaration == 'ENTITY' 239 " Check for entity name. If none exists, indent one level. Otherwise, if 240 " the name actually turns out to be a percent sign, “%”, this is a 241 " parameter entity. Read another token to determine the entity name and, 242 " again, if none exists, indent one level. 243 let [name, end] = s:lex(line, end) 244 if name == "" 245 return indent + shiftwidth() 246 elseif name == '%' 247 let [name, end] = s:lex(line, end) 248 if name == "" 249 return indent + shiftwidth() 250 endif 251 endif 252 253 " Now check for the entity value. If none exists, indent one level. If it 254 " does exist, indent to same level as first line, as we’re now done with 255 " this entity. 256 " 257 " The entity value can be a string in single or double quotes (no escapes 258 " to worry about, as entities are used instead). However, it can also be 259 " that this is an external unparsed entity. In that case we have to look 260 " further for (possibly) a public ID and an URI followed by the NDATA 261 " keyword and the actual notation name. For the public ID and URI, indent 262 " two levels, if they don’t exist. If the NDATA keyword doesn’t exist, 263 " indent one level. Otherwise, if the actual notation name doesn’t exist, 264 " indent two level. If it does, indent to same level as first line, as 265 " we’re now done with this entity. 266 let [value, end] = s:lex(line, end) 267 if value == "" 268 return indent + shiftwidth() 269 elseif value == 'SYSTEM' || value == 'PUBLIC' 270 let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)') 271 if quoted_string == "" 272 return indent + shiftwidth() * 2 273 endif 274 275 if value == 'PUBLIC' 276 let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)') 277 if quoted_string == "" 278 return indent + shiftwidth() * 2 279 endif 280 endif 281 282 let [ndata, end] = s:lex(line, end) 283 if ndata == "" 284 return indent + shiftwidth() 285 endif 286 287 let [name, end] = s:lex(line, end) 288 return name == "" ? (indent + shiftwidth() * 2) : indent 289 else 290 return indent 291 endif 292 elseif declaration == 'NOTATION' 293 " Check for notation name. If none exists, indent one level. 294 let [name, end] = s:lex(line, end) 295 if name == "" 296 return indent + shiftwidth() 297 endif 298 299 " Now check for the external ID. If none exists, indent one level. 300 let [id, end] = s:lex(line, end) 301 if id == "" 302 return indent + shiftwidth() 303 elseif id == 'SYSTEM' || id == 'PUBLIC' 304 let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)') 305 if quoted_string == "" 306 return indent + shiftwidth() * 2 307 endif 308 309 if id == 'PUBLIC' 310 let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\|>\)') 311 if quoted_string == "" 312 " TODO: Should use s:lex here on getline(v:lnum) and check for >. 313 return getline(v:lnum) =~ '^\s*>' ? indent : (indent + shiftwidth() * 2) 314 elseif quoted_string == '>' 315 return indent 316 endif 317 endif 318 endif 319 320 return indent 321 endif 322 323 " TODO: Processing directives could be indented I suppose. But perhaps it’s 324 " just as well to let the user decide how to indent them (perhaps extending 325 " this function to include proper support for whatever processing directive 326 " language they want to use). 327 328 " Conditional sections are simply passed along to let Vim decide what to do 329 " (and hence the user). 330 return -1 331 endfunction 332 333 let &cpo = s:cpo_save 334 unlet s:cpo_save