tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

html.html (234774B)


      1 <!-- EDITOR NOTES  -*- mode: Text; fill-column: 100 -*-
      2 !
      3 !   Adding a new element involves editing the following sections:
      4 !    - section for the element itself
      5 !    - descriptions of the element's categories
      6 !    - images/content-venn.svg
      7 !    - syntax, if it's void or otherwise special
      8 !    - parser, if it's not phrasing-level
      9 !    - rendering
     10 !    - obsolete section
     11 !    - element, attribute, content model, and interface indexes
     12 !    - adding it to the section with ARIA mappings
     13 !
     14 !-->
     15 
     16 <!--
     17 ! http://lists.w3.org/Archives/Public/www-archive/2014Apr/0034.html
     18 !-->
     19 
     20 <!--START complete--><!--START dev-html-->
     21 <!DOCTYPE html>
     22 <!--SET FINGERPRINT=<span title="fingerprinting vector" class="fingerprint"><img src="images/fingerprint.png" alt="(This is a fingerprinting vector.)" width=46 height=64></span>-->
     23 <html lang="en-GB-x-hixie" class="big">
     24 <head>
     25  <title>HTML Standard</title>
     26  <script>
     27   var loadTimer = new Date();
     28   var current_revision = "r" + "$Revision: 1 $".substr(11);
     29   current_revision = current_revision.substr(0, current_revision.length - 2);
     30   var last_known_revision = current_revision;
     31   function F( /* varargs... */) {
     32     var fragment = document.createDocumentFragment();
     33     for (var index = 0; index < arguments.length; index += 1) {
     34       if (arguments[index] instanceof Array) {
     35         fragment.appendChild(F.apply(this, arguments[index]));
     36       } else if (typeof arguments[index] == 'string') {
     37         fragment.appendChild(document.createTextNode(arguments[index]));
     38       } else {
     39         fragment.appendChild(arguments[index]);
     40       }
     41     }
     42     return fragment;
     43   }
     44   function E(name, /* optional */ attributes /*, varargs... */) {
     45     var element = document.createElement(name);
     46     var index = 1;
     47     if ((arguments.length > 1) && (typeof attributes != 'string') &&
     48         (!(attributes instanceof Node)) && (!(attributes instanceof Array))) {
     49       for (var attName in attributes) {
     50         if (typeof attributes[attName] == 'boolean') {
     51           if (attributes[attName])
     52             element.setAttribute(attName, '');
     53         } else if (typeof attributes[attName] == 'function') {
     54           element[attName] = attributes[attName];
     55         } else {
     56           element.setAttribute(attName, attributes[attName]);
     57         }
     58       }
     59       index = 2;
     60     }
     61     for (; index < arguments.length; index += 1) {
     62       if (arguments[index] instanceof Array) {
     63         element.appendChild(F.apply(this, arguments[index]));
     64       } else if (typeof arguments[index] == 'string') {
     65         element.appendChild(document.createTextNode(arguments[index]));
     66       } else {
     67         element.appendChild(arguments[index]);
     68       }
     69     }
     70     return element;
     71   }
     72   function getCookie(name) {
     73     var params = location.search.substr(1).split("&");
     74     for (var index = 0; index < params.length; index++) {
     75       if (params[index] == name)
     76         return "1";
     77       var data = params[index].split("=");
     78       if (data[0] == name)
     79         return unescape(data[1]);
     80     }
     81     var cookies = document.cookie.split("; ");
     82     for (var index = 0; index < cookies.length; index++) {
     83       var data = cookies[index].split("=");
     84       if (data[0] == name)
     85         return unescape(data[1]);
     86     }
     87     return null;
     88   }
     89   var currentAlert;
     90   var currentAlertTimeout;
     91   function showAlert(s, href) {
     92     if (!currentAlert) {
     93       currentAlert = document.createElement('div');
     94       currentAlert.id = 'alert';
     95       var x = document.createElement('button');
     96       x.textContent = '\u2573';
     97       x.onclick = closeAlert2;
     98       currentAlert.appendChild(x);
     99       currentAlert.appendChild(document.createElement('span'));
    100       currentAlert.onmousemove = function () {
    101         clearTimeout(currentAlertTimeout);
    102         currentAlert.className = '';
    103         currentAlertTimeout = setTimeout(closeAlert, 10000);
    104       }
    105       document.body.appendChild(currentAlert);
    106     } else {
    107       clearTimeout(currentAlertTimeout);
    108       currentAlert.className = '';
    109     }
    110     currentAlert.lastChild.textContent = '';
    111     currentAlert.lastChild.appendChild(F(s));
    112     if (href) {
    113       var link = document.createElement('a');
    114       link.href = href;
    115       link.textContent = href;
    116       currentAlert.lastChild.appendChild(F(' ', link));
    117     }
    118     currentAlertTimeout = setTimeout(closeAlert, 10000);
    119   }
    120   function closeAlert() {
    121     clearTimeout(currentAlertTimeout);
    122     if (currentAlert) {
    123       currentAlert.className = 'closed';
    124       currentAlertTimeout = setTimeout(closeAlert2, 3000);
    125     }
    126   }
    127   function closeAlert2() {
    128     clearTimeout(currentAlertTimeout);
    129     if (currentAlert) {
    130       currentAlert.parentNode.removeChild(currentAlert);
    131       currentAlert = null;
    132     }
    133   }
    134   window.addEventListener('keydown', function (event) {
    135     if (event.keyCode == 27) {
    136       if (currentAlert)
    137         closeAlert2();
    138     } else {
    139       closeAlert();
    140     }
    141   }, false);
    142   window.addEventListener('scroll', function (event) {
    143     closeAlert();
    144   }, false);
    145   function load(script) {
    146     var e = document.createElement('script');
    147     e.setAttribute('src', '//www.whatwg.org/specs/web-apps/current-work/' + script);
    148     document.body.appendChild(e);
    149   }
    150 
    151   var startedInit = 0;
    152   function init() {
    153     startedInit = 1;
    154     if (location.search == '?slow-browser')
    155       return;
    156     load('reviewer.js');
    157     if (document.documentElement.className == "big" || document.documentElement.className == "big split index")
    158       load('toc.js');
    159     load('updater.js');
    160     load('dfn.js');
    161     load('status.js');
    162     if (getCookie('profile') == '1')
    163       document.getElementsByTagName('h2')[0].textContent += '; load: ' + (new Date() - loadTimer) + 'ms';
    164   }
    165   if (document.documentElement.className == "")
    166     setTimeout(function () {
    167       if (!startedInit)
    168         showAlert("Too slow? Try reading the multipage copy of the spec instead:", "http://whatwg.org/html");
    169     }, 6000);
    170 
    171   window.addEventListener('keypress', function (event) {
    172     if ((event.which == 114) && (event.metaKey)) {
    173       if (!confirm('Are you sure you want to reload this page?'))
    174         event.preventDefault();
    175     }
    176   }, false);
    177 
    178  </script>
    179  <link rel="stylesheet" href="//www.whatwg.org/style/specification">
    180  <link rel="icon" href="//www.whatwg.org/images/icon">
    181  <style>
    182   .proposal { border: blue solid; padding: 1em; }
    183   .bad, .bad *:not(.XXX) { color: gray; border-color: gray; background: transparent; }
    184   #updatesStatus { display: none; z-index: 10; }
    185   #updatesStatus.relevant { display: block; position: fixed; right: 1em; top: 1em; padding: 0.5em; font: bold small sans-serif; min-width: 25em; width: 30%; max-width: 40em; height: auto; border: ridge 4px gray; background: #EEEEEE; color: black; }
    186   div.head .logo { width: 11em; margin-bottom: 20em; }
    187 
    188   #configUI { position: absolute; z-index: 20; top: auto; right: 0; width: 11em; padding: 0 0.5em 0 0.5em; font-size: small; background: gray; background: rgba(32,32,32,0.9); color: white; border-radius: 1em 0 0 1em; -moz-border-radius: 1em 0 0 1em; }
    189   #configUI p { margin: 0.75em 0; padding: 0.3em; }
    190   #configUI p label { display: block; }
    191   #configUI #updateUI, #configUI .loginUI { text-align: center; }
    192   #configUI input[type=button] { display: block; margin: auto; }
    193   #configUI :link, #configUI :visited { color: white; }
    194   #configUI :link:hover, #configUI :visited:hover { background: transparent; }
    195 
    196   #alert { position: fixed; top: 20%; left: 20%; right: 20%; font-size: 2em; padding: 0.5em; z-index: 40; background: gray; background: rgba(32,32,32,0.9); color: white; border-radius: 1em; -moz-border-radius: 1em; -webkit-transition: opacity 1s linear; }
    197   #alert.closed { opacity: 0; }
    198   #alert button { position: absolute; top: -1em; right: 2em; border-radius: 1em 1em 0 0; border: none; line-height: 0.9; color: white; background: rgb(64,64,64); font-size: 0.6em; font-weight: 900; cursor: pointer; }
    199   #alert :link, #alert :visited { color: white; }
    200   #alert :link:hover, #alert :visited:hover { background: transparent; }
    201   @media print { #configUI { display: none; } }
    202 
    203   .rfc2119 { font-variant: small-caps; text-shadow: 0 0 0.5em yellow; position: static; }
    204   .rfc2119::after { position: absolute; left: 0; width: 25px; text-align: center; color: yellow; text-shadow: 0.075em 0.075em 0.2em black; }
    205   .rfc2119.m\ust::after { content: '\2605'; }
    206   .rfc2119.s\hould::after { content: '\2606'; }
    207   [hidden] { display: none; }
    208 
    209   .fingerprint { float: right; }
    210 
    211   .applies thead th > * { display: block; }
    212   .applies thead code { display: block; }
    213   .applies td { text-align: center; }
    214   .applies .yes { background: yellow; }
    215 
    216   .matrix, .matrix td { border: hidden; text-align: right; }
    217   .matrix { margin-left: 2em; }
    218 
    219   .vertical-summary-table tr > th[rowspan="2"]:first-child + th,
    220   .vertical-summary-table tr > td[rowspan="2"]:first-child + td { border-bottom: hidden; }
    221 
    222   .dice-example { border-collapse: collapse; border-style: hidden solid solid hidden; border-width: thin; margin-left: 3em; }
    223   .dice-example caption { width: 30em; font-size: smaller; font-style: italic; padding: 0.75em 0; text-align: left; }
    224   .dice-example td, .dice-example th { border: solid thin; width: 1.35em; height: 1.05em; text-align: center; padding: 0; }
    225 
    226   td.eg { border-width: thin; text-align: center; }
    227 
    228   #table-example-1 { border: solid thin; border-collapse: collapse; margin-left: 3em; }
    229   #table-example-1 * { font-family: "Essays1743", serif; line-height: 1.01em; }
    230   #table-example-1 caption { padding-bottom: 0.5em; }
    231   #table-example-1 thead, #table-example-1 tbody { border: none; }
    232   #table-example-1 th, #table-example-1 td { border: solid thin; }
    233   #table-example-1 th { font-weight: normal; }
    234   #table-example-1 td { border-style: none solid; vertical-align: top; }
    235   #table-example-1 th { padding: 0.5em; vertical-align: middle; text-align: center; }
    236   #table-example-1 tbody tr:first-child td { padding-top: 0.5em; }
    237   #table-example-1 tbody tr:last-child td { padding-bottom: 1.5em; }
    238   #table-example-1 tbody td:first-child { padding-left: 2.5em; padding-right: 0; width: 9em; }
    239   #table-example-1 tbody td:first-child::after { content: leader(". "); }
    240   #table-example-1 tbody td { padding-left: 2em; padding-right: 2em; }
    241   #table-example-1 tbody td:first-child + td { width: 10em; }
    242   #table-example-1 tbody td:first-child + td ~ td { width: 2.5em; }
    243   #table-example-1 tbody td:first-child + td + td + td ~ td { width: 1.25em; }
    244 
    245   .apple-table-examples { border: none; border-collapse: separate; border-spacing: 1.5em 0em; width: 40em; margin-left: 3em; }
    246   .apple-table-examples * { font-family: "Times", serif; }
    247   .apple-table-examples td, .apple-table-examples th { border: none; white-space: nowrap; padding-top: 0; padding-bottom: 0; }
    248   .apple-table-examples tbody th:first-child { border-left: none; width: 100%; }
    249   .apple-table-examples thead th:first-child ~ th { font-size: smaller; font-weight: bolder; border-bottom: solid 2px; text-align: center; }
    250   .apple-table-examples tbody th::after, .apple-table-examples tfoot th::after { content: leader(". ") }
    251   .apple-table-examples tbody th, .apple-table-examples tfoot th { font: inherit; text-align: left; }
    252   .apple-table-examples td { text-align: right; vertical-align: top; }
    253   .apple-table-examples.e1 tbody tr:last-child td { border-bottom: solid 1px; }
    254   .apple-table-examples.e1 tbody + tbody tr:last-child td { border-bottom: double 3px; }
    255   .apple-table-examples.e2 th[scope=row] { padding-left: 1em; }
    256   .apple-table-examples sup { line-height: 0; }
    257 
    258   .three-column-nowrap tr > td:first-child,
    259   .three-column-nowrap tr > td:first-child + td,
    260   .three-column-nowrap tr > td:first-child + td + td { white-space: nowrap; }
    261 
    262   .details-example img { vertical-align: top; }
    263 
    264   #base64-table {
    265     white-space: nowrap;
    266     font-size: 0.6em;
    267     column-width: 6em;
    268     column-count: 5;
    269     column-gap: 1em;
    270     -moz-column-width: 6em;
    271     -moz-column-count: 5;
    272     -moz-column-gap: 1em;
    273     -webkit-column-width: 6em;
    274     -webkit-column-count: 5;
    275     -webkit-column-gap: 1em;
    276   }
    277   #base64-table thead { display: none; }
    278   #base64-table * { border: none; }
    279   #base64-table tbody td:first-child:after { content: ':'; }
    280   #base64-table tbody td:last-child { text-align: right; }
    281 
    282   #named-character-references-table {
    283     white-space: nowrap;
    284     font-size: 0.6em;
    285     column-width: 30em;
    286     column-gap: 1em;
    287     -moz-column-width: 30em;
    288     -moz-column-gap: 1em;
    289     -webkit-column-width: 30em;
    290     -webkit-column-gap: 1em;
    291   }
    292   #named-character-references-table > table > tbody > tr > td:first-child + td,
    293   #named-character-references-table > table > tbody > tr > td:last-child { text-align: center; }
    294   #named-character-references-table > table > tbody > tr > td:last-child:hover > span { position: absolute; top: auto; left: auto; margin-left: 0.5em; line-height: 1.2; font-size: 5em; border: outset; padding: 0.25em 0.5em; background: white; width: 1.25em; height: auto; text-align: center; }
    295   #named-character-references-table > table > tbody > tr#entity-CounterClockwiseContourIntegral > td:first-child { font-size: 0.5em; }
    296 
    297   .glyph.control { color: red; }
    298 
    299   @font-face {
    300     font-family: 'Essays1743';
    301     src: url('//www.whatwg.org/specs/web-apps/current-work/fonts/Essays1743.ttf');
    302   }
    303   @font-face {
    304     font-family: 'Essays1743';
    305     font-weight: bold;
    306     src: url('//www.whatwg.org/specs/web-apps/current-work/fonts/Essays1743-Bold.ttf');
    307   }
    308   @font-face {
    309     font-family: 'Essays1743';
    310     font-style: italic;
    311     src: url('//www.whatwg.org/specs/web-apps/current-work/fonts/Essays1743-Italic.ttf');
    312   }
    313   @font-face {
    314     font-family: 'Essays1743';
    315     font-style: italic;
    316     font-weight: bold;
    317     src: url('//www.whatwg.org/specs/web-apps/current-work/fonts/Essays1743-BoldItalic.ttf');
    318   }
    319  </style>
    320  <link rel="stylesheet" href="status.css">
    321 </head>
    322 <body onload="init()">
    323  <header class="head with-buttons" id="head">
    324   <p><a href="//www.whatwg.org/" class="logo"><img width="101" height="101" alt="WHATWG" src="/images/logo"></a></p>
    325   <hgroup>
    326    <h1 class="allcaps">HTML</h1>
    327    <h2 class="no-num no-toc">Living Standard &mdash; Last Updated <span class="pubdate">[DATE: 01 Jan 1901]</span></h2>
    328   </hgroup>
    329   <div>
    330    <div>
    331     <a href="//whatwg.org/html"><span><strong>Multipage Version</strong> <code>whatwg.org/html</code></span></a>
    332     <a href="//whatwg.org/c"><span><strong>One-Page Version</strong> <code>whatwg.org/c</code></span></a>
    333     <a href="//whatwg.org/pdf"><span><strong>PDF Version</strong> <code>whatwg.org/pdf</code></span></a>
    334     <a href="http://developers.whatwg.org/"><span><strong>Developer Version</strong> <code>developers.whatwg.org</code></span></a>
    335    </div>
    336    <div>
    337     <a class="misc" href="//whatwg.org/faq"><span><strong>FAQ</strong> <code>whatwg.org/faq</code></span></a>
    338     <a class="misc" href="http://validator.whatwg.org/"><span><strong>Validators</strong> <code>validator.whatwg.org</code></span></a>
    339    </div>
    340    <div>
    341     <a class="comms" href="//www.whatwg.org/mailing-list"><span><strong>Join our Mailing List</strong> <code>whatwg@whatwg.org</code></span></a>
    342     <a class="comms" href="http://wiki.whatwg.org/wiki/IRC"><span><strong>Join us on IRC</strong> <code>#whatwg on Freenode</code></span></a>
    343     <a class="comms" href="http://forums.whatwg.org/"><span><strong>Join our Forums</strong> <code>forums.whatwg.org</code></span></a>
    344    </div>
    345    <div>
    346     <!--<a class="changes" href="http://svn.whatwg.org/webapps"><span><strong>SVN Repository</strong> <code>svn.whatwg.org/webapps</code></span></a>-->
    347     <a class="changes" href="http://html5.org/tools/web-apps-tracker"><span><strong>Change Log</strong> <code>html5.org's tracker</code></span></a>
    348     <a class="changes" href="http://twitter.com/WHATWG"><span><strong>Twitter Updates</strong> <code>@WHATWG</code></span></a>
    349    </div>
    350    <div>
    351     <a class="feedback" href="https://www.w3.org/Bugs/Public/buglist.cgi?bug_status=UNCONFIRMED&amp;bug_status=NEW&amp;bug_status=ASSIGNED&amp;bug_status=REOPENED&amp;component=HTML&amp;product=WHATWG"><span><strong>View Open Bugs</strong> <code>filed in Bugzilla</code></span></a>
    352     <a class="feedback" href="//www.whatwg.org/newbug"><span><strong>File a Bug</strong> <code>whatwg.org/newbug</code></span></a>
    353     <a class="feedback" href="http://ian.hixie.ch/+"><span><strong>E-mail the Editor</strong> <code>ian@hixie.ch</code></span></a>
    354    </div>
    355   </div>
    356  </header>
    357 
    358  <hr>
    359 
    360  <div id="configUI"></div>
    361 
    362  <h2 class="no-num no-toc" id="contents">Table of contents</h2>
    363  <!--toc-->
    364 
    365  <hr>
    366 
    367 <!--
    368  <pre class="idl">
    369   interface Screen { }; // CSSOM
    370   interface URL { }; // URL API
    371   interface Blob { }; // File API
    372   interface File : Blob { }; // File API
    373   interface FileList { }; // File API
    374   interface WebGLRenderingContext { }; // WebGL
    375   interface XMLDocument { }; // DOM
    376   interface HTMLCollection { }; // DOM
    377   interface DOMTokenList { }; // DOM
    378   interface DOMSettableTokenList { attribute any value; }; // DOM
    379   interface SVGMatrix { }; // SVG
    380   // fake interfaces that map to JS object types:
    381   interface ArrayBuffer { };
    382   interface Int8Array { };
    383   interface Uint8Array { };
    384   interface Uint8ClampedArray { };
    385   interface Int16Array { };
    386   interface Uint16Array { };
    387   interface Int32Array { };
    388   interface Uint32Array { };
    389   interface Float32Array { };
    390   interface Float64Array { };
    391   interface Uint8ClampedArray { };
    392  </pre>
    393 -->
    394 
    395  <h2 id="introduction">Introduction</h2>
    396 
    397  <div class="nodev">
    398 
    399  <h3 id="abstract">Where does this specification fit?</h3>
    400 
    401  <p>This specification defines a big part of the Web platform, in lots of detail. Its place in the
    402  Web platform specification stack relative to other specifications can be best summed up as
    403  follows:</p>
    404 
    405  <p><img src="images/abstract.png" width="398" height="359" alt="It consists of everything else, above such core technologies as HTTP, URI/IRIs, DOM, XML, Unicode, and ECMAScript; below presentation-layer technologies like CSS and the NPAPI; and to the side of technologies like Geolocation, SVG, MathML, and XHR."></p>
    406 
    407  </div>
    408 
    409 
    410  <h3 id="is-this-html5?">Is this HTML5?</h3>
    411 
    412  <!-- NON-NORMATIVE SECTION -->
    413 
    414  <p>In short: Yes.</p>
    415 
    416  <p>In more length: The term "HTML5" is widely used as a buzzword to refer to modern Web
    417  technologies, many of which (though by no means all) are developed at the WHATWG. This document is
    418  one such; others are available from <a href="http://www.whatwg.org/specs/">the WHATWG
    419  specification index</a>.</p>
    420 
    421  <p class="note">Although we have asked them to stop doing so, the W3C also republishes some parts
    422  of this specification as separate documents. There are numerous differences between this
    423  specification and the W3C forks; some minor, some major. Unfortunately these are not currently
    424  accurately documented anywhere, so there is no way to know which are intentional and which are
    425  not.</p>
    426 
    427 
    428  <h3>Background</h3>
    429 
    430  <!-- NON-NORMATIVE SECTION -->
    431 
    432  <p>HTML is the World Wide Web's core markup language. Originally, HTML was primarily designed as a
    433  language for semantically describing scientific documents. Its general design, however, has
    434  enabled it to be adapted, over the subsequent years, to describe a number of other types of
    435  documents and even applications.</p>
    436 
    437 
    438  <h3>Audience</h3>
    439 
    440  <!-- NON-NORMATIVE SECTION -->
    441 
    442  <p>This specification is intended for authors of documents and scripts that use the features
    443  defined in this specification<span class="nodev">, implementors of tools that operate on pages that
    444  use the features defined in this specification, and individuals wishing to establish the
    445  correctness of documents or implementations with respect to the requirements of this
    446  specification</span>.</p>
    447 
    448  <p>This document is probably not suited to readers who do not already have at least a passing
    449  familiarity with Web technologies, as in places it sacrifices clarity for precision, and brevity
    450  for completeness. More approachable tutorials and authoring guides can provide a gentler
    451  introduction to the topic.</p>
    452 
    453  <p>In particular, familiarity with the basics of DOM is necessary for a complete understanding of
    454  some of the more technical parts of this specification. An understanding of Web IDL, HTTP, XML,
    455  Unicode, character encodings, JavaScript, and CSS will also be helpful in places but is not
    456  essential.</p>
    457 
    458 
    459  <h3>Scope</h3>
    460 
    461  <!-- NON-NORMATIVE SECTION -->
    462 
    463  <p>This specification is limited to providing a semantic-level markup language and associated
    464  semantic-level scripting APIs for authoring accessible pages on the Web ranging from static
    465  documents to dynamic applications.</p>
    466 
    467  <p>The scope of this specification does not include providing mechanisms for media-specific
    468  customization of presentation (although default rendering rules for Web browsers are included at
    469  the end of this specification, and several mechanisms for hooking into CSS are provided as part of
    470  the language).</p>
    471 
    472  <p>The scope of this specification is not to describe an entire operating system. In particular,
    473  hardware configuration software, image manipulation tools, and applications that users would be
    474  expected to use with high-end workstations on a daily basis are out of scope. In terms of
    475  applications, this specification is targeted specifically at applications that would be expected
    476  to be used by users on an occasional basis, or regularly but from disparate locations, with low
    477  CPU requirements. Examples of such applications include online purchasing systems, searching
    478  systems, games (especially multiplayer online games), public telephone books or address books,
    479  communications software (e-mail clients, instant messaging clients, discussion software), document
    480  editing software, etc.</p>
    481 
    482 
    483  <h3>History</h3>
    484 
    485  <!-- NON-NORMATIVE SECTION -->
    486 
    487  <p>For its first five years (1990-1995), HTML went through a number of revisions and experienced a
    488  number of extensions, primarily hosted first at CERN, and then at the IETF.</p>
    489 
    490  <p>With the creation of the W3C, HTML's development changed venue again. A first abortive attempt
    491  at extending HTML in 1995 known as HTML 3.0 then made way to a more pragmatic approach known as
    492  HTML 3.2, which was completed in 1997. HTML4 quickly followed later that same year.</p>
    493 
    494  <p>The following year, the W3C membership decided to stop evolving HTML and instead begin work on
    495  an XML-based equivalent, called XHTML. <!-- http://www.w3.org/MarkUp/future/#summary --> This
    496  effort started with a reformulation of HTML4 in XML, known as XHTML 1.0, which added no new
    497  features except the new serialisation, and which was completed in 2000. After XHTML 1.0, the W3C's
    498  focus turned to making it easier for other working groups to extend XHTML, under the banner of
    499  XHTML Modularization. In parallel with this, the W3C also worked on a new language that was not
    500  compatible with the earlier HTML and XHTML languages, calling it XHTML2.</p>
    501 
    502  <p>Around the time that HTML's evolution was stopped in 1998, parts of the API for HTML developed
    503  by browser vendors were specified and published under the name DOM Level 1 (in 1998) and DOM Level
    504  2 Core and DOM Level 2 HTML (starting in 2000 and culminating in 2003). These efforts then petered
    505  out, with some DOM Level 3 specifications published in 2004 but the working group being closed
    506  before all the Level 3 drafts were completed.</p>
    507 
    508  <p>In 2003, the publication of XForms, a technology which was positioned as the next generation of
    509  Web forms, sparked a renewed interest in evolving HTML itself, rather than finding replacements
    510  for it. This interest was borne from the realization that XML's deployment as a Web technology was
    511  limited to entirely new technologies (like RSS and later Atom), rather than as a replacement for
    512  existing deployed technologies (like HTML).</p>
    513 
    514  <p>A proof of concept to show that it was possible to extend HTML4's forms to provide many of the
    515  features that XForms 1.0 introduced, without requiring browsers to implement rendering engines
    516  that were incompatible with existing HTML Web pages, was the first result of this renewed
    517  interest. At this early stage, while the draft was already publicly available, and input was
    518  already being solicited from all sources, the specification was only under Opera Software's
    519  copyright.</p>
    520 
    521  <p>The idea that HTML's evolution should be reopened was tested at a W3C workshop in 2004, where
    522  some of the principles that underlie the HTML5 work (described below), as well as the
    523  aforementioned early draft proposal covering just forms-related features, were presented to the
    524  W3C jointly by Mozilla and Opera. The proposal was rejected on the grounds that the proposal
    525  conflicted with the previously chosen direction for the Web's evolution; the W3C staff and
    526  membership voted to continue developing XML-based replacements instead.</p>
    527 
    528  <p>Shortly thereafter, Apple, Mozilla, and Opera jointly announced their intent to continue
    529  working on the effort under the umbrella of a new venue called the WHATWG. A public mailing list
    530  was created, and the draft was moved to the WHATWG site. The copyright was subsequently amended to
    531  be jointly owned by all three vendors, and to allow reuse of the specification.</p>
    532 
    533  <p>The WHATWG was based on several core principles, in particular that technologies need to be
    534  backwards compatible, that specifications and implementations need to match even if this means
    535  changing the specification rather than the implementations, and that specifications need to be
    536  detailed enough that implementations can achieve complete interoperability without
    537  reverse-engineering each other.</p>
    538 
    539  <p>The latter requirement in particular required that the scope of the HTML5 specification include
    540  what had previously been specified in three separate documents: HTML4, XHTML1, and DOM2 HTML. It
    541  also meant including significantly more detail than had previously been considered the norm.</p>
    542 
    543  <p>In 2006, the W3C indicated an interest to participate in the development of HTML5 after all,
    544  and in 2007 formed a working group chartered to work with the WHATWG on the development of the
    545  HTML5 specification. Apple, Mozilla, and Opera allowed the W3C to publish the specification under
    546  the W3C copyright, while keeping a version with the less restrictive license on the WHATWG
    547  site.</p>
    548 
    549  <p>For a number of years, both groups then worked together. In 2011, however, the groups came to
    550  the conclusion that they had different goals: the W3C wanted to publish a "finished" version of
    551  "HTML5", while the WHATWG wanted to continue working on a Living Standard for HTML, continuously
    552  maintaining the specification rather than freezing it in a state with known problems, and adding
    553  new features as needed to evolve the platform.</p>
    554 
    555  <p>Since then, the WHATWG has been working on this specification (amongst others), and the W3C has
    556  been copying fixes made by the WHATWG into their fork of the document, as well as making other
    557  changes, some intentional and some not, with no documentation listing or explaining the
    558  differences.</p>
    559 
    560 
    561 
    562  <h3>Design notes</h3>
    563 
    564  <!-- NON-NORMATIVE SECTION -->
    565 
    566  <p>It must be admitted that many aspects of HTML appear at first glance to be nonsensical and
    567  inconsistent.</p>
    568 
    569  <p>HTML, its supporting DOM APIs, as well as many of its supporting technologies, have been
    570  developed over a period of several decades by a wide array of people with different priorities
    571  who, in many cases, did not know of each other's existence.</p>
    572 
    573  <p>Features have thus arisen from many sources, and have not always been designed in especially
    574  consistent ways. Furthermore, because of the unique characteristics of the Web, implementation
    575  bugs have often become de-facto, and now de-jure, standards, as content is often unintentionally
    576  written in ways that rely on them before they can be fixed.</p>
    577 
    578  <p>Despite all this, efforts have been made to adhere to certain design goals. These are described
    579  in the next few subsections.</p>
    580 
    581 
    582 
    583  <h4>Serializability of script execution</h4>
    584 
    585  <!-- NON-NORMATIVE SECTION -->
    586 
    587  <p>To avoid exposing Web authors to the complexities of multithreading, the HTML and DOM APIs are
    588  designed such that no script can ever detect the simultaneous execution of other scripts. Even
    589  with <span data-x="Worker">workers</span>, the intent is that the behavior of implementations can
    590  be thought of as completely serializing the execution of all scripts in all <span data-x="browsing
    591  context">browsing contexts</span>.</p>
    592 
    593  <p class="note">The <code
    594  data-x="dom-navigator-yieldForStorageUpdates">navigator.yieldForStorageUpdates()</code> method, in
    595  this model, is equivalent to allowing other scripts to run while the calling script is
    596  blocked.</p>
    597 
    598 
    599 
    600  <h4>Compliance with other specifications</h4>
    601 
    602  <!-- NON-NORMATIVE SECTION -->
    603 
    604  <p>This specification interacts with and relies on a wide variety of other specifications. In
    605  certain circumstances, unfortunately, conflicting needs have led to this specification violating
    606  the requirements of these other specifications. Whenever this has occurred, the transgressions
    607  have each been noted as a "<dfn>willful violation</dfn>", and the reason for the violation has
    608  been noted.</p>
    609 
    610 
    611 
    612  <h4>Extensibility</h4>
    613 
    614  <!-- NON-NORMATIVE SECTION -->
    615 
    616  <p>HTML has a wide array of extensibility mechanisms that can be used for adding semantics in a
    617  safe manner:</p>
    618 
    619  <ul>
    620 
    621   <li><p>Authors can use the <code data-x="attr-class">class</code> attribute to extend elements,
    622   effectively creating their own elements, while using the most applicable existing "real" HTML
    623   element, so that browsers and other tools that don't know of the extension can still support it
    624   somewhat well. This is the tack used by microformats, for example.</p></li>
    625 
    626   <li><p>Authors can include data for inline client-side scripts or server-side site-wide scripts
    627   to process using the <code data-x="attr-data-*">data-*=""</code> attributes. These are guaranteed
    628   to never be touched by browsers, and allow scripts to include data on HTML elements that scripts
    629   can then look for and process.</p></li>
    630 
    631   <li><p>Authors can use the <code data-x="meta">&lt;meta name="" content=""></code> mechanism to
    632   include page-wide metadata by registering <span data-x="concept-meta-extensions">extensions to
    633   the predefined set of metadata names</span>.</p></li>
    634 
    635   <li><p>Authors can use the <code data-x="attr-hyperlink-rel">rel=""</code> mechanism to annotate
    636   links with specific meanings by registering <span data-x="concept-rel-extensions">extensions to
    637   the predefined set of link types</span>. This is also used by microformats.</p></li>
    638 
    639   <li><p>Authors can embed raw data using the <code data-x="script">&lt;script type=""></code>
    640   mechanism with a custom type, for further handling by inline or server-side scripts.</p></li>
    641 
    642   <li><p>Authors can create <span data-x="plugin">plugins</span> and invoke them using the
    643   <code>embed</code> element. This is how Flash works.</p></li>
    644 
    645   <li><p>Authors can extend APIs using the JavaScript prototyping mechanism. This is widely used by
    646   script libraries, for instance.</p></li>
    647 
    648   <li><p>Authors can use the microdata feature (the <code
    649   data-x="attr-itemscope">itemscope=""</code> and <code data-x="attr-itemprop">itemprop=""</code>
    650   attributes) to embed nested name-value pairs of data to be shared with other applications and
    651   sites.</p></li>
    652 
    653  </ul>
    654 
    655 
    656 
    657 
    658  <h3>HTML vs XHTML</h3>
    659 
    660  <!-- NON-NORMATIVE SECTION -->
    661 
    662  <p>This specification defines an abstract language for describing documents and applications, and
    663  some APIs for interacting with in-memory representations of resources that use this language.</p>
    664 
    665  <p>The in-memory representation is known as "DOM HTML", or "the DOM" for short.</p>
    666 
    667  <p>There are various concrete syntaxes that can be used to transmit resources that use this
    668  abstract language, two of which are defined in this specification.</p>
    669 
    670  <p>The first such concrete syntax is the HTML syntax. This is the format suggested for most
    671  authors. It is compatible with most legacy Web browsers. If a document is transmitted with the
    672  <code>text/html</code> <span>MIME type</span>, then it will be processed as an HTML document by
    673  Web browsers. This specification defines the latest HTML syntax, known simply as "HTML".</p>
    674 
    675  <p>The second concrete syntax is the XHTML syntax, which is an application of XML. When a document
    676  is transmitted with an <span>XML MIME type</span>, such as <code>application/xhtml+xml</code>,
    677  then it is treated as an XML document by Web browsers, to be parsed by an XML processor. Authors
    678  are reminded that the processing for XML and HTML differs; in particular, even minor syntax errors
    679  will prevent a document labeled as XML from being rendered fully, whereas they would be ignored in
    680  the HTML syntax. This specification defines the latest XHTML syntax, known simply as "XHTML".</p>
    681 
    682  <p>The DOM, the HTML syntax, and the XHTML syntax cannot all represent the same content. For
    683  example, namespaces cannot be represented using the HTML syntax, but they are supported in the DOM
    684  and in the XHTML syntax. Similarly, documents that use the <code>noscript</code> feature can be
    685  represented using the HTML syntax, but cannot be represented with the DOM or in the XHTML syntax.
    686  Comments that contain the string "<code data-x="">--&gt;</code>" can only be represented in the
    687  DOM, not in the HTML and XHTML syntaxes.</p>
    688 
    689 
    690  <h3>Structure of this specification</h3>
    691 
    692  <!-- NON-NORMATIVE SECTION -->
    693 
    694  <p>This specification is divided into the following major sections:</p>
    695 
    696  <dl>
    697 
    698 
    699   <dt><a href="#introduction">Introduction</a></dt>
    700 
    701   <dd>Non-normative materials providing a context for the HTML standard.</dd>
    702 
    703 
    704   <dt><a href="#infrastructure">Common infrastructure</a></dt>
    705 
    706   <dd>The conformance classes, algorithms, definitions, and the common underpinnings of the rest of
    707   the specification.</dd>
    708 
    709 
    710   <dt><a href="#dom">Semantics, structure, and APIs of HTML documents</a></dt>
    711 
    712   <dd>Documents are built from elements. These elements form a tree using the DOM. This section
    713   defines the features of this DOM, as well as introducing the features common to all elements, and
    714   the concepts used in defining elements.</dd>
    715 
    716 
    717   <dt><a href="#semantics">The elements of HTML</a></dt>
    718 
    719   <dd>Each element has a predefined meaning, which is explained in this section. Rules for authors
    720   on how to use the element<span class="nodev">, along with user agent requirements for how to
    721   handle each element,</span> are also given. This includes large signature features of HTML such
    722   as video playback and subtitles, form controls and form submission, and a 2D graphics API known
    723   as the HTML canvas.</dd>
    724 
    725 
    726   <dt><a href="#microdata">Microdata</a></dt>
    727 
    728   <dd>This specification introduces a mechanism for adding machine-readable annotations to
    729   documents, so that tools can extract trees of name-value pairs from the document. This section
    730   describes this mechanism<span class="nodev"> and some algorithms that can be used to convert HTML
    731   documents into other formats</span>. This section also defines some sample Microdata vocabularies
    732   for contact information, calendar events, and licensing works.</dd>
    733 
    734 
    735   <dt><a href="#editing">User interaction</a></dt>
    736 
    737   <dd>HTML documents can provide a number of mechanisms for users to interact with and modify
    738   content, which are described in this section, such as how focus works, and drag-and-drop.</dd>
    739 
    740 
    741   <dt><a href="#browsers">Loading Web pages</a></dt>
    742 
    743   <dd>HTML documents do not exist in a vacuum &mdash; this section defines many of the features
    744   that affect environments that deal with multiple pages, such as Web browsers and offline
    745   caching of Web applications.</dd>
    746 
    747 
    748   <dt><a href="#webappapis">Web application APIs</a></dt>
    749 
    750   <dd>This section introduces basic features for scripting of applications in HTML.</dd>
    751 
    752 
    753   <dt><a href="#workers">Web workers</a></dt>
    754 
    755   <dd>This section defines an API for background threads in JavaScript.</dd>
    756 
    757 
    758   <dt><a href="#comms">The communication APIs</a></dt>
    759 
    760   <dd>This section describes some mechanisms that applications written in HTML can use to
    761   communicate with other applications from different domains running on the same client. It also
    762   introduces a server-push event stream mechanism known as Server Sent Events or
    763   <code>EventSource</code>, and a two-way full-duplex socket protocol for scripts known as Web
    764   Sockets.
    765 
    766   </dd>
    767 
    768 
    769   <dt><a href="#webstorage">Web storage</a></dt>
    770 
    771   <dd>This section defines a client-side storage mechanism based on name-value pairs.</dd>
    772 
    773 
    774   <dt><a href="#syntax">The HTML syntax</a></dt>
    775   <dt><a href="#xhtml">The XHTML syntax</a></dt>
    776 
    777   <dd>All of these features would be for naught if they couldn't be represented in a serialized
    778   form and sent to other people, and so these sections define the syntaxes of HTML and XHTML<span
    779   class="nodev">, along with rules for how to parse content using those syntaxes</span>.</dd>
    780 
    781 
    782   <dt><a href="#rendering">Rendering</a></dt>
    783 
    784   <dd>This section defines the default rendering rules for Web browsers.</dd>
    785 
    786 
    787  </dl>
    788 
    789  <p>There are also some appendices, listing <a href="#obsolete">obsolete features</a> and <a
    790  href="#iana">IANA considerations</a>, and several indices.</p>
    791 
    792 
    793 
    794  <h4>How to read this specification</h4>
    795 
    796  <p>This specification should be read like all other specifications. First, it should be read
    797  cover-to-cover, multiple times. Then, it should be read backwards at least once. Then it should be
    798  read by picking random sections from the contents list and following all the cross-references.</p>
    799 
    800  <p>As described in the conformance requirements section below, this specification describes
    801  conformance criteria for a variety of conformance classes. In particular, there are conformance
    802  requirements that apply to <em>producers</em>, for example authors and the documents they create,
    803  and there are conformance requirements that apply to <em>consumers</em>, for example Web browsers.
    804  They can be distinguished by what they are requiring: a requirement on a producer states what is
    805  allowed, while a requirement on a consumer states how software is to act.</p>
    806 
    807  <div class="example">
    808 
    809   <p>For example, "the <code data-x="">foo</code> attribute's value must be a <span>valid
    810   integer</span>" is a requirement on producers, as it lays out the allowed values; in contrast,
    811   the requirement "the <code data-x="">foo</code> attribute's value must be parsed using the
    812   <span>rules for parsing integers</span>" is a requirement on consumers, as it describes how to
    813   process the content.</p>
    814 
    815  </div>
    816 
    817  <p><strong>Requirements on producers have no bearing whatsoever on consumers.</strong></p>
    818 
    819  <div class="example">
    820 
    821   <p>Continuing the above example, a requirement stating that a particular attribute's value is
    822   constrained to being a <span>valid integer</span> emphatically does <em>not</em> imply anything
    823   about the requirements on consumers. It might be that the consumers are in fact required to treat
    824   the attribute as an opaque string, completely unaffected by whether the value conforms to the
    825   requirements or not. It might be (as in the previous example) that the consumers are required to
    826   parse the value using specific rules that define how invalid (non-numeric in this case) values
    827   are to be processed.</p>
    828 
    829  </div>
    830 
    831 
    832 
    833  <h4>Typographic conventions</h4>
    834 
    835  <p>This is a definition, requirement, or explanation.</p>
    836 
    837  <p class="note">This is a note.</p>
    838 
    839  <p class="example">This is an example.</p>
    840 
    841  <p class="&#x0058;&#x0058;&#x0058;">This is an open issue.</p>
    842 
    843  <p class="warning">This is a warning.</p>
    844 
    845  <pre class="idl extract">interface <dfn data-x="">Example</dfn> {
    846  // this is an IDL definition
    847 };</pre>
    848 
    849  <dl class="domintro">
    850 
    851   <dt><var data-x="">variable</var> = <var data-x="">object</var> . <code data-x="">method</code>( [ <var data-x="">optionalArgument</var> ] )</dt>
    852 
    853   <dd>
    854 
    855    <p>This is a note to authors describing the usage of an interface.</p>
    856 
    857   </dd>
    858 
    859  </dl>
    860 
    861  <pre class="css">/* this is a CSS fragment */</pre>
    862 
    863  <p>The defining instance of a term is marked up like <dfn data-x="x-this">this</dfn>. Uses of that
    864  term are marked up like <span data-x="x-this">this</span> or like <i data-x="x-this">this</i>.</p>
    865 
    866  <p>The defining instance of an element, attribute, or API is marked up like <dfn
    867  data-x="x-that"><code>this</code></dfn>. References to that element, attribute, or API are marked
    868  up like <code data-x="x-that">this</code>.</p>
    869 
    870  <p>Other code fragments are marked up <code data-x="">like this</code>.</p>
    871 
    872  <p>Variables are marked up like <var data-x="">this</var>.</p>
    873 
    874  <p>In an algorithm, steps in <span data-x="synchronous section">synchronous sections</span> are
    875  marked with &#x231B;.</p>
    876 
    877  <p>In some cases, requirements are given in the form of lists with conditions and corresponding
    878  requirements. In such cases, the requirements that apply to a condition are always the first set
    879  of requirements that follow the condition, even in the case of there being multiple sets of
    880  conditions for those requirements. Such cases are presented as follows:</p>
    881 
    882  <dl class="switch">
    883 
    884   <dt>This is a condition
    885   <dt>This is another condition
    886   <dd>This is the requirement that applies to the conditions above.
    887 
    888   <dt>This is a third condition
    889   <dd>This is the requirement that applies to the third condition.
    890 
    891  </dl>
    892 
    893 
    894 
    895  <h3 id="fingerprint">Privacy concerns</h3>
    896 
    897  <!-- NON-NORMATIVE SECTION -->
    898 
    899  <p>Some features of HTML trade user convenience for a measure of user privacy.</p>
    900 
    901  <p>In general, due to the Internet's architecture, a user can be distinguished from another by the
    902  user's IP address. IP addresses do not perfectly match to a user; as a user moves from device to
    903  device, or from network to network, their IP address will change; similarly, NAT routing, proxy
    904  servers, and shared computers enable packets that appear to all come from a single IP address to
    905  actually map to multiple users. Technologies such as onion routing can be used to further
    906  anonymise requests so that requests from a single user at one node on the Internet appear to come
    907  from many disparate parts of the network.</p>
    908 
    909  <p>However, the IP address used for a user's requests is not the only mechanism by which a user's
    910  requests could be related to each other. Cookies, for example, are designed specifically to enable
    911  this, and are the basis of most of the Web's session features that enable you to log into a site
    912  with which you have an account.</p>
    913 
    914  <p>There are other mechanisms that are more subtle. Certain characteristics of a user's system can
    915  be used to distinguish groups of users from each other; by collecting enough such information, an
    916  individual user's browser's "digital fingerprint" can be computed, which can be as good, if not
    917  better, as an IP address in ascertaining which requests are from the same user.</p>
    918 
    919  <p>Grouping requests in this manner, especially across multiple sites, can be used for both benign
    920  (and even arguably positive) purposes, as well as for malevolent purposes. An example of a
    921  reasonably benign purpose would be determining whether a particular person seems to prefer sites
    922  with dog illustrations as opposed to sites with cat illustrations (based on how often they visit
    923  the sites in question) and then automatically using the preferred illustrations on subsequent
    924  visits to participating sites. Malevolent purposes, however, could include governments combining
    925  information such as the person's home address (determined from the addresses they use when getting
    926  driving directions on one site) with their apparent political affiliations (determined by
    927  examining the forum sites that they participate in) to determine whether the person should be
    928  prevented from voting in an election.</p>
    929 
    930  <p>Since the malevolent purposes can be remarkably evil, user agent implementors are encouraged to
    931  consider how to provide their users with tools to minimise leaking information that could be used
    932  to fingerprint a user.</p>
    933 
    934  <p>Unfortunately, as the first paragraph in this section implies, sometimes there is great benefit
    935  to be derived from exposing the very information that can also be used for fingerprinting
    936  purposes, so it's not as easy as simply blocking all possible leaks. For instance, the ability to
    937  log into a site to post under a specific identity requires that the user's requests be
    938  identifiable as all being from the same user, more or less by definition. More subtly, though,
    939  information such as how wide text is, which is necessary for many effects that involve drawing
    940  text onto a canvas (e.g. any effect that involves drawing a border around the text) also leaks
    941  information that can be used to group a user's requests. (In this case, by potentially exposing,
    942  via a brute force search, which fonts a user has installed, information which can vary
    943  considerably from user to user.)</p>
    944 
    945  <p>Features in this specification which can be <dfn data-x="fingerprinting vector">used to
    946  fingerprint the user</dfn> are marked as this paragraph is.
    947  <!--INSERT FINGERPRINT-->
    948  </p>
    949 
    950  <p>Other features in the platform can be used for the same purpose, though, including, though not
    951  limited to:</p>
    952 
    953  <ul>
    954 
    955   <li>The exact list of which features a user agents supports.</li>
    956 
    957   <li>The maximum allowed stack depth for recursion in script.</li>
    958 
    959   <li>Features that describe the user's environment, like Media Queries and the <code>Screen</code>
    960   object. <a href="#refsMQ">[MQ]</a> <a href="#refsCSSOMVIEW">[CSSOMVIEW]</a></li>
    961 
    962   <li>The user's time zone.</li>
    963 
    964  </ul>
    965 
    966 
    967 
    968  <h3>A quick introduction to HTML</h3>
    969 
    970  <!-- NON-NORMATIVE SECTION -->
    971 
    972  <p>A basic HTML document looks like this:</p>
    973 
    974  <pre id="intro-early-example">&lt;!DOCTYPE html>
    975 &lt;html>
    976 &lt;head>
    977  &lt;title>Sample page&lt;/title>
    978 &lt;/head>
    979 &lt;body>
    980  &lt;h1>Sample page&lt;/h1>
    981  &lt;p>This is a &lt;a href="demo.html">simple&lt;/a> sample.&lt;/p>
    982  &lt;!-- this is a comment -->
    983 &lt;/body>
    984 &lt;/html></pre>
    985 
    986  <p>HTML documents consist of a tree of elements and text. Each element is denoted in the source by
    987  a <span data-x="syntax-start-tag">start tag</span>, such as "<code data-x="">&lt;body></code>", and
    988  an <span data-x="syntax-end-tag">end tag</span>, such as "<code data-x="">&lt;/body></code>".
    989  (Certain start tags and end tags can in certain cases be <span
    990  data-x="syntax-tag-omission">omitted</span> and are implied by other tags.)</p>
    991 
    992  <p>Tags have to be nested such that elements are all completely within each other, without
    993  overlapping:</p>
    994 
    995  <pre class="bad">&lt;p>This is &lt;em>very &lt;strong>wrong&lt;/em>!&lt;/strong>&lt;/p></pre>
    996  <pre>&lt;p>This &lt;em>is &lt;strong>correct&lt;/strong>.&lt;/em>&lt;/p></pre>
    997 
    998  <p>This specification defines a set of elements that can be used in HTML, along with rules about
    999  the ways in which the elements can be nested.</p>
   1000 
   1001  <p>Elements can have attributes, which control how the elements work. In the example below, there
   1002  is a <span>hyperlink</span>, formed using the <code>a</code> element and its <code
   1003  data-x="attr-hyperlink-href">href</code> attribute:</p>
   1004 
   1005  <pre>&lt;a href="demo.html">simple&lt;/a></pre>
   1006 
   1007  <p><span data-x="syntax-attributes">Attributes</span> are placed inside the start tag, and consist
   1008  of a <span data-x="syntax-attribute-name">name</span> and a <span
   1009  data-x="syntax-attribute-value">value</span>, separated by an "<code data-x="">=</code>" character.
   1010  The attribute value can remain <a href="#unquoted">unquoted</a> if it doesn't contain <span
   1011  data-x="space character">space characters</span> or any of <code data-x="">"</code> <code
   1012  data-x="">'</code> <code data-x="">`</code> <code data-x="">=</code> <code data-x="">&lt;</code> or
   1013  <code data-x="">&gt;</code>. Otherwise, it has to be quoted using either single or double quotes.
   1014  The value, along with the "<code data-x="">=</code>" character, can be omitted altogether if the
   1015  value is the empty string.</p>
   1016 
   1017  <pre>&lt;!-- empty attributes -->
   1018 &lt;input name=address disabled>
   1019 &lt;input name=address disabled="">
   1020 
   1021 &lt;!-- attributes with a value -->
   1022 &lt;input name=address maxlength=200>
   1023 &lt;input name=address maxlength='200'>
   1024 &lt;input name=address maxlength="200"></pre>
   1025 
   1026  <p>HTML user agents (e.g. Web browsers) then <i>parse</i> this markup, turning it into a DOM
   1027  (Document Object Model) tree. A DOM tree is an in-memory representation of a document.</p>
   1028 
   1029  <p>DOM trees contain several kinds of nodes, in particular a <code>DocumentType</code> node,
   1030  <code>Element</code> nodes, <code>Text</code> nodes, <code>Comment</code> nodes, and in some cases
   1031  <code>ProcessingInstruction</code> nodes.</p>
   1032 
   1033  <p>The <a href="#intro-early-example">markup snippet at the top of this section</a> would be
   1034  turned into the following DOM tree:</p>
   1035 
   1036  <ul class="domTree"><li class="t10">DOCTYPE: <code data-x="">html</code></li><li class="t1"><code>html</code><ul><li class="t1"><code>head</code><ul><li class="t3"><code>#text</code>: <span data-x="">&#x23CE;&#x2423;&#x2423;</span></li><li class="t1"><code>title</code><ul><li class="t3"><code>#text</code>: <span data-x="">Sample page</span></li></ul></li><li class="t3"><code>#text</code>: <span data-x="">&#x23CE;&#x2423;</span></li></ul></li><li class="t3"><code>#text</code>: <span data-x="">&#x23CE;&#x2423;</span></li><li class="t1"><code>body</code><ul><li class="t3"><code>#text</code>: <span data-x="">&#x23CE;&#x2423;&#x2423;</span></li><li class="t1"><code>h1</code><ul><li class="t3"><code>#text</code>: <span data-x="">Sample page</span></li></ul></li><li class="t3"><code>#text</code>: <span data-x="">&#x23CE;&#x2423;&#x2423;</span></li><li class="t1"><code>p</code><ul><li class="t3"><code>#text</code>: <span data-x="">This is a <!--grammar-check-override--></span></li><li class="t1"><code>a</code> <span data-x="" class="t2"><code class="attribute name">href</code>="<code class="attribute value">demo.html</code>"</span><ul><li class="t3"><code>#text</code>: <span data-x="">simple</span></li></ul></li><li class="t3"><code>#text</code>: <span data-x=""> sample.</span></li></ul></li><li class="t3"><code>#text</code>: <span data-x="">&#x23CE;&#x2423;&#x2423;</span></li><li class="t8"><code>#comment</code>: <span data-x=""> this is a comment </span></li><li class="t3"><code>#text</code>: <span data-x="">&#x23CE;&#x2423;&#x23CE;</span></li></ul></li></ul></li></ul>
   1037 
   1038  <p>The <span>root element</span> of this tree is the <code>html</code> element, which is the
   1039  element always found at the root of HTML documents. It contains two elements, <code>head</code>
   1040  and <code>body</code>, as well as a <code>Text</code> node between them.</p>
   1041 
   1042  <p>There are many more <code>Text</code> nodes in the DOM tree than one would initially expect,
   1043  because the source contains a number of spaces (represented here by "&#x2423;") and line breaks
   1044  ("&#x23CE;") that all end up as <code>Text</code> nodes in the DOM. However, for historical
   1045  reasons not all of the spaces and line breaks in the original markup appear in the DOM. In
   1046  particular, all the whitespace before <code>head</code> start tag ends up being dropped silently,
   1047  and all the whitespace after the <code>body</code> end tag ends up placed at the end of the
   1048  <code>body</code>.</p>
   1049 
   1050  <p>The <code>head</code> element contains a <code>title</code> element, which itself contains a
   1051  <code>Text</code> node with the text "Sample page". Similarly, the <code>body</code> element
   1052  contains an <code>h1</code> element, a <code>p</code> element, and a comment.</p>
   1053 
   1054  <hr>
   1055 
   1056  <p>This DOM tree can be manipulated from scripts in the page. Scripts (typically in JavaScript)
   1057  are small programs that can be embedded using the <code>script</code> element or using <span>event
   1058  handler content attributes</span>. For example, here is a form with a script that sets the value
   1059  of the form's <code>output</code> element to say "Hello World":</p>
   1060 
   1061  <pre>&lt;<span>form</span> <span data-x="attr-form-name">name</span>="main">
   1062 Result: &lt;<span>output</span> <span data-x="attr-fe-name">name</span>="result">&lt;/output>
   1063 &lt;<span>script</span>>
   1064  <span data-x="Document">document</span>.<span data-x="dom-document-forms">forms</span>.main.<span data-x="dom-form-elements">elements</span>.result.<span data-x="dom-output-value">value</span> = 'Hello World';
   1065 &lt;/script>
   1066 &lt;/form></pre>
   1067 
   1068  <p>Each element in the DOM tree is represented by an object, and these objects have APIs so that
   1069  they can be manipulated. For instance, a link (e.g. the <code>a</code> element in the tree above)
   1070  can have its "<code data-x="attr-hyperlink-href">href</code>" attribute changed in several
   1071  ways:</p>
   1072 
   1073  <pre>var a = <span data-x="Document">document</span>.<span data-x="dom-document-links">links</span>[0]; // obtain the first link in the document
   1074 a.<span data-x="dom-url-href">href</span> = 'sample.html'; // change the destination URL of the link
   1075 a.<span data-x="dom-url-protocol">protocol</span> = 'https'; // change just the scheme part of the URL
   1076 a.setAttribute('href', 'http://example.com/'); // change the content attribute directly</pre>
   1077 
   1078  <p>Since DOM trees are used as the way to represent HTML documents when they are processed and
   1079  presented by implementations (especially interactive implementations like Web browsers), this
   1080  specification is mostly phrased in terms of DOM trees, instead of the markup described above.</p>
   1081 
   1082  <hr>
   1083 
   1084  <p>HTML documents represent a media-independent description of interactive content. HTML documents
   1085  might be rendered to a screen, or through a speech synthesiser, or on a braille display. To
   1086  influence exactly how such rendering takes place, authors can use a styling language such as
   1087  CSS.</p>
   1088 
   1089  <p>In the following example, the page has been made yellow-on-blue using CSS.</p>
   1090 
   1091  <pre>&lt;!DOCTYPE html>
   1092 &lt;html>
   1093 &lt;head>
   1094  &lt;title>Sample styled page&lt;/title>
   1095  &lt;style>
   1096   body { background: navy; color: yellow; }
   1097  &lt;/style>
   1098 &lt;/head>
   1099 &lt;body>
   1100  &lt;h1>Sample styled page&lt;/h1>
   1101  &lt;p>This page is just a demo.&lt;/p>
   1102 &lt;/body>
   1103 &lt;/html></pre>
   1104 
   1105  <p>For more details on how to use HTML, authors are encouraged to consult tutorials and guides.
   1106  Some of the examples included in this specification might also be of use, but the novice author is
   1107  cautioned that this specification, by necessity, defines the language with a level of detail that
   1108  might be difficult to understand at first.</p>
   1109 
   1110 
   1111 
   1112 <!--ADD-TOPIC:Security-->
   1113  <h4>Writing secure applications with HTML</h4>
   1114 
   1115  <!-- NON-NORMATIVE SECTION -->
   1116 
   1117  <p>When HTML is used to create interactive sites, care needs to be taken to avoid introducing
   1118  vulnerabilities through which attackers can compromise the integrity of the site itself or of the
   1119  site's users.</p>
   1120 
   1121  <p>A comprehensive study of this matter is beyond the scope of this document, and authors are
   1122  strongly encouraged to study the matter in more detail. However, this section attempts to provide
   1123  a quick introduction to some common pitfalls in HTML application development.</p>
   1124 
   1125  <p>The security model of the Web is based on the concept of "origins", and correspondingly many of
   1126  the potential attacks on the Web involve cross-origin actions. <a
   1127  href="#refsORIGIN">[ORIGIN]</a></p>
   1128 
   1129  <dl>
   1130 
   1131   <dt>Not validating user input</dt>
   1132   <dt>Cross-site scripting (XSS)</dt>
   1133   <dt>SQL injection</dt>
   1134 
   1135   <dd>
   1136 
   1137    <p>When accepting untrusted input, e.g. user-generated content such as text comments, values in
   1138    URL parameters, messages from third-party sites, etc, it is imperative that the data be
   1139    validated before use, and properly escaped when displayed. Failing to do this can allow a
   1140    hostile user to perform a variety of attacks, ranging from the potentially benign, such as
   1141    providing bogus user information like a negative age, to the serious, such as running scripts
   1142    every time a user looks at a page that includes the information, potentially propagating the
   1143    attack in the process, to the catastrophic, such as deleting all data in the server.</p>
   1144 
   1145    <p>When writing filters to validate user input, it is imperative that filters always be
   1146    whitelist-based, allowing known-safe constructs and disallowing all other input. Blacklist-based
   1147    filters that disallow known-bad inputs and allow everything else are not secure, as not
   1148    everything that is bad is yet known (for example, because it might be invented in the
   1149    future).</p>
   1150 
   1151    <div class="example">
   1152 
   1153     <p>For example, suppose a page looked at its URL's query string to determine what to display,
   1154     and the site then redirected the user to that page to display a message, as in:</p>
   1155 
   1156     <pre>&lt;ul>
   1157 &lt;li>&lt;a href="message.cgi?say=Hello">Say Hello&lt;/a>
   1158 &lt;li>&lt;a href="message.cgi?say=Welcome">Say Welcome&lt;/a>
   1159 &lt;li>&lt;a href="message.cgi?say=Kittens">Say Kittens&lt;/a>
   1160 &lt;/ul></pre>
   1161 
   1162     <p>If the message was just displayed to the user without escaping, a hostile attacker could
   1163     then craft a URL that contained a script element:</p>
   1164 
   1165     <pre>http://example.com/message.cgi?say=%3Cscript%3Ealert%28%27Oh%20no%21%27%29%3C/script%3E</pre>
   1166 
   1167     <p>If the attacker then convinced a victim user to visit this page, a script of the attacker's
   1168     choosing would run on the page. Such a script could do any number of hostile actions, limited
   1169     only by what the site offers: if the site is an e-commerce shop, for instance, such a script
   1170     could cause the user to unknowingly make arbitrarily many unwanted purchases.</p>
   1171 
   1172     <p>This is called a cross-site scripting attack.</p>
   1173 
   1174    </div>
   1175 
   1176    <p>There are many constructs that can be used to try to trick a site into executing code. Here
   1177    are some that authors are encouraged to consider when writing whitelist filters:</p>
   1178 
   1179    <ul>
   1180 
   1181     <li>When allowing harmless-seeming elements like <code>img</code>, it is important to whitelist
   1182     any provided attributes as well. If one allowed all attributes then an attacker could, for
   1183     instance, use the <code data-x="handler-onload">onload</code> attribute to run arbitrary
   1184     script.</li>
   1185 
   1186     <li>When allowing URLs to be provided (e.g. for links), the scheme of each URL also needs to be
   1187     explicitly whitelisted, as there are many schemes that can be abused. The most prominent
   1188     example is "<code data-x="javascript-protocol">javascript:</code>", but user agents can
   1189     implement (and indeed, have historically implemented) others.</li> <!-- IE had vbscript:,
   1190     Netscape had livescript:, etc. -->
   1191 
   1192     <li>Allowing a <code>base</code> element to be inserted means any <code>script</code> elements
   1193     in the page with relative links can be hijacked, and similarly that any form submissions can
   1194     get redirected to a hostile site.</li>
   1195 
   1196    </ul>
   1197 
   1198   </dd>
   1199 
   1200 
   1201   <dt>Cross-site request forgery (CSRF)</dt>
   1202 
   1203   <dd>
   1204 
   1205    <p>If a site allows a user to make form submissions with user-specific side-effects, for example
   1206    posting messages on a forum under the user's name, making purchases, or applying for a passport,
   1207    it is important to verify that the request was made by the user intentionally, rather than by
   1208    another site tricking the user into making the request unknowingly.</p>
   1209 
   1210    <p>This problem exists because HTML forms can be submitted to other origins.</p>
   1211 
   1212    <p>Sites can prevent such attacks by populating forms with user-specific hidden tokens, or by
   1213    checking <code data-x="http-origin">Origin</code> headers on all requests.</p>
   1214 
   1215   </dd>
   1216 
   1217 
   1218 
   1219   <dt>Clickjacking</dt>
   1220 
   1221   <dd>
   1222 
   1223    <p>A page that provides users with an interface to perform actions that the user might not wish
   1224    to perform needs to be designed so as to avoid the possibility that users can be tricked into
   1225    activating the interface.</p>
   1226 
   1227    <p>One way that a user could be so tricked is if a hostile site places the victim site in a
   1228    small <code>iframe</code> and then convinces the user to click, for instance by having the user
   1229    play a reaction game. Once the user is playing the game, the hostile site can quickly position
   1230    the iframe under the mouse cursor just as the user is about to click, thus tricking the user
   1231    into clicking the victim site's interface.</p>
   1232 
   1233    <p>To avoid this, sites that do not expect to be used in frames are encouraged to only enable
   1234    their interface if they detect that they are not in a frame (e.g. by comparing the <code
   1235    data-x="dom-window">window</code> object to the value of the <code data-x="dom-top">top</code>
   1236    attribute).</p>
   1237 
   1238   </dd>
   1239 
   1240  </dl>
   1241 <!--REMOVE-TOPIC:Security-->
   1242 
   1243 
   1244  <h4>Common pitfalls to avoid when using the scripting APIs</h4>
   1245 
   1246  <!-- NON-NORMATIVE SECTION -->
   1247 
   1248  <p>Scripts in HTML have "run-to-completion" semantics, meaning that the browser will generally run
   1249  the script uninterrupted before doing anything else, such as firing further events or continuing
   1250  to parse the document.</p>
   1251 
   1252  <p>On the other hand, parsing of HTML files happens asynchronously and incrementally, meaning that
   1253  the parser can pause at any point to let scripts run. This is generally a good thing, but it does
   1254  mean that authors need to be careful to avoid hooking event handlers after the events could have
   1255  possibly fired.</p>
   1256 
   1257  <p>There are two techniques for doing this reliably: use <span>event handler content
   1258  attributes</span>, or create the element and add the event handlers in the same script. The latter
   1259  is safe because, as mentioned earlier, scripts are run to completion before further events can
   1260  fire.</p>
   1261 
   1262  <div class="example">
   1263 
   1264   <p>One way this could manifest itself is with <code>img</code> elements and the <code
   1265   data-x="event-load">load</code> event. The event could fire as soon as the element has been
   1266   parsed, especially if the image has already been cached (which is common).</p>
   1267 
   1268   <p>Here, the author uses the <code data-x="handler-onload">onload</code> handler on an
   1269   <code>img</code> element to catch the <code data-x="event-load">load</code> event:</p>
   1270 
   1271   <pre>&lt;img src="games.png" alt="Games" onload="gamesLogoHasLoaded(event)"></pre>
   1272 
   1273   <p>If the element is being added by script, then so long as the event handlers are added in the
   1274   same script, the event will still not be missed:</p>
   1275 
   1276   <pre>&lt;script>
   1277 var img = new Image();
   1278 img.src = 'games.png';
   1279 img.alt = 'Games';
   1280 img.onload = gamesLogoHasLoaded;
   1281 // img.addEventListener('load', gamesLogoHasLoaded, false); // would work also
   1282 &lt;/script></pre>
   1283 
   1284   <p>However, if the author first created the <code>img</code> element and then in a separate
   1285   script added the event listeners, there's a chance that the <code data-x="event-load">load</code>
   1286   event would be fired in between, leading it to be missed:</p>
   1287 
   1288   <pre class="bad">&lt;!-- Do not use this style, it has a race condition! -->
   1289 &lt;img id="games" src="games.png" alt="Games">
   1290 &lt;!-- the 'load' event might fire here while the parser is taking a
   1291      break, in which case you will not see it! -->
   1292 &lt;script>
   1293  var img = document.getElementById('games');
   1294  img.onload = gamesLogoHasLoaded; // might never fire!
   1295 &lt;/script></pre>
   1296 
   1297  </div>
   1298 
   1299 
   1300 
   1301  <h4>How to catch mistakes when writing HTML: validators and conformance checkers</h4>
   1302 
   1303  <!-- NON-NORMATIVE SECTION -->
   1304 
   1305  <p>Authors are encouraged to make use of conformance checkers (also known as <i>validators</i>) to
   1306  catch common mistakes. The WHATWG maintains a list of such tools at: <a
   1307  href="http://validator.whatwg.org/">http://validator.whatwg.org/</a></p>
   1308 
   1309 
   1310 
   1311  <h3>Conformance requirements for authors</h3>
   1312 
   1313  <!-- NON-NORMATIVE SECTION -->
   1314 
   1315  <p>Unlike previous versions of the HTML specification, this specification defines in some detail
   1316  the required processing for invalid documents as well as valid documents.</p> <!-- This has led to
   1317  some questioning the purpose of conformance criteria: if there is no ambiguity in how something
   1318  will be processed, why disallow it? -->
   1319 
   1320  <p>However, even though the processing of invalid content is in most cases well-defined,
   1321  conformance requirements for documents are still important: in practice, interoperability (the
   1322  situation in which all implementations process particular content in a reliable and identical or
   1323  equivalent way) is not the only goal of document conformance requirements. This section details
   1324  some of the more common reasons for still distinguishing between a conforming document and one
   1325  with errors.</p>
   1326 
   1327 
   1328  <h4>Presentational markup</h4>
   1329 
   1330  <!-- NON-NORMATIVE SECTION -->
   1331 
   1332  <p>The majority of presentational features from previous versions of HTML are no longer allowed.
   1333  Presentational markup in general has been found to have a number of problems:</p>
   1334 
   1335  <dl>
   1336 
   1337   <dt>The use of presentational elements leads to poorer accessibility</dt>
   1338 
   1339   <dd>
   1340 
   1341    <p>While it is possible to use presentational markup in a way that provides users of assistive
   1342    technologies (ATs) with an acceptable experience (e.g. using ARIA), doing so is significantly
   1343    more difficult than doing so when using semantically-appropriate markup. Furthermore, even using
   1344    such techniques doesn't help make pages accessible for non-AT non-graphical users, such as users
   1345    of text-mode browsers.</p>
   1346 
   1347    <p>Using media-independent markup, on the other hand, provides an easy way for documents to be
   1348    authored in such a way that they work for more users (e.g. text browsers).</p>
   1349 
   1350   </dd>
   1351 
   1352 
   1353   <dt>Higher cost of maintenance</dt>
   1354 
   1355   <dd>
   1356 
   1357    <p>It is significantly easier to maintain a site written in such a way that the markup is
   1358    style-independent. For example, changing the colour of a site that uses
   1359    <code>&lt;font&nbsp;color=""></code> throughout requires changes across the entire site, whereas
   1360    a similar change to a site based on CSS can be done by changing a single file.</p>
   1361 
   1362   </dd>
   1363 
   1364 
   1365   <dt>Larger document sizes</dt>
   1366 
   1367   <dd>
   1368 
   1369    <p>Presentational markup tends to be much more redundant, and thus results in larger document
   1370    sizes.</p>
   1371 
   1372   </dd>
   1373 
   1374  </dl>
   1375 
   1376  <p>For those reasons, presentational markup has been removed from HTML in this version. This
   1377  change should not come as a surprise; HTML4 deprecated presentational markup many years ago and
   1378  provided a mode (HTML4 Transitional) to help authors move away from presentational markup; later,
   1379  XHTML 1.1 went further and obsoleted those features altogether.</p>
   1380 
   1381  <p>The only remaining presentational markup features in HTML are the <code
   1382  data-x="attr-style">style</code> attribute and the <code>style</code> element. Use of the <code
   1383  data-x="attr-style">style</code> attribute is somewhat discouraged in production environments, but
   1384  it can be useful for rapid prototyping (where its rules can be directly moved into a separate
   1385  style sheet later) and for providing specific styles in unusual cases where a separate style sheet
   1386  would be inconvenient. Similarly, the <code>style</code> element can be useful in syndication or
   1387  for page-specific styles, but in general an external style sheet is likely to be more convenient
   1388  when the styles apply to multiple pages.</p>
   1389 
   1390  <p>It is also worth noting that some elements that were previously presentational have been
   1391  redefined in this specification to be media-independent: <code>b</code>, <code>i</code>,
   1392  <code>hr</code>, <code>s</code>, <code>small</code>, and <code>u</code>.</p>
   1393 
   1394 
   1395  <h4>Syntax errors</h4>
   1396 
   1397  <!-- NON-NORMATIVE SECTION -->
   1398 
   1399  <p>The syntax of HTML is constrained to avoid a wide variety of problems.</p>
   1400 
   1401  <dl>
   1402 
   1403   <dt>Unintuitive error-handling behavior</dt>
   1404 
   1405   <dd>
   1406 
   1407    <p>Certain invalid syntax constructs, when parsed, result in DOM trees that are highly
   1408    unintuitive.</p>
   1409 
   1410    <div class="example">
   1411 
   1412     <p>For example, the following markup fragment results in a DOM with an <code>hr</code> element
   1413     that is an <em>earlier</em> sibling of the corresponding <code>table</code> element:</p>
   1414 
   1415     <pre class="bad">&lt;table>&lt;hr>...</pre>
   1416 
   1417    </div>
   1418 
   1419   </dd>
   1420 
   1421 
   1422   <dt>Errors with optional error recovery</dt>
   1423 
   1424   <dd>
   1425 
   1426    <p>To allow user agents to be used in controlled environments without having to implement the
   1427    more bizarre and convoluted error handling rules, user agents are permitted to fail whenever
   1428    encountering a <span>parse error</span>.</p>
   1429 
   1430   </dd>
   1431 
   1432 
   1433   <dt>Errors where the error-handling behavior is not compatible with streaming user agents</dt>
   1434 
   1435   <dd>
   1436 
   1437    <p>Some error-handling behavior, such as the behavior for the <code
   1438    data-x="">&lt;table>&lt;hr>...</code> example mentioned above, are incompatible with streaming
   1439    user agents (user agents that process HTML files in one pass, without storing state). To avoid
   1440    interoperability problems with such user agents, any syntax resulting in such behavior is
   1441    considered invalid.</p>
   1442 
   1443   </dd>
   1444 
   1445 
   1446   <dt>Errors that can result in infoset coercion</dt>
   1447 
   1448   <dd>
   1449 
   1450    <p>When a user agent based on XML is connected to an HTML parser, it is possible that certain
   1451    invariants that XML enforces, such as comments never containing two consecutive hyphens, will be
   1452    violated by an HTML file. Handling this can require that the parser coerce the HTML DOM into an
   1453    XML-compatible infoset. Most syntax constructs that require such handling are considered
   1454    invalid.</p>
   1455 
   1456   </dd>
   1457 
   1458 
   1459   <dt>Errors that result in disproportionally poor performance</dt>
   1460 
   1461   <dd>
   1462 
   1463    <p>Certain syntax constructs can result in disproportionally poor performance. To discourage the
   1464    use of such constructs, they are typically made non-conforming.</p>
   1465 
   1466    <div class="example">
   1467 
   1468     <p>For example, the following markup results in poor performance, since all the unclosed
   1469     <code>i</code> elements have to be reconstructed in each paragraph, resulting in progressively
   1470     more elements in each paragraph:</p>
   1471 
   1472     <pre class="bad">&lt;p>&lt;i>He dreamt.
   1473 &lt;p>&lt;i>He dreamt that he ate breakfast.
   1474 &lt;p>&lt;i>Then lunch.
   1475 &lt;p>&lt;i>And finally dinner.</pre>
   1476 
   1477     <p>The resulting DOM for this fragment would be:</p>
   1478 
   1479     <ul class="domTree"><li class="t1"><code>p</code><ul><li class="t1"><code>i</code><ul><li class="t3"><code>#text</code>: <span data-x="">He dreamt.</span></li></ul></li></ul></li><li class="t1"><code>p</code><ul><li class="t1"><code>i</code><ul><li class="t1"><code>i</code><ul><li class="t3"><code>#text</code>: <span data-x="">He dreamt that he ate breakfast.</span></li></ul></li></ul></li></ul></li><li class="t1"><code>p</code><ul><li class="t1"><code>i</code><ul><li class="t1"><code>i</code><ul><li class="t1"><code>i</code><ul><li class="t3"><code>#text</code>: <span data-x="">Then lunch.</span></li></ul></li></ul></li></ul></li></ul></li><li class="t1"><code>p</code><ul><li class="t1"><code>i</code><ul><li class="t1"><code>i</code><ul><li class="t1"><code>i</code><ul><li class="t1"><code>i</code><ul><li class="t3"><code>#text</code>: <span data-x="">And finally dinner.</span></li></ul></li></ul></li></ul></li></ul></li></ul></li></ul>
   1480 
   1481    </div>
   1482 
   1483   </dd>
   1484 
   1485 
   1486   <dt>Errors involving fragile syntax constructs</dt>
   1487 
   1488   <dd>
   1489 
   1490    <p>There are syntax constructs that, for historical reasons, are relatively fragile. To help
   1491    reduce the number of users who accidentally run into such problems, they are made
   1492    non-conforming.</p>
   1493 
   1494    <div class="example">
   1495 
   1496     <p>For example, the parsing of certain named character references in attributes happens even
   1497     with the closing semicolon being omitted. It is safe to include an ampersand followed by
   1498     letters that do not form a named character reference, but if the letters are changed to a
   1499     string that <em>does</em> form a named character reference, they will be interpreted as that
   1500     character instead.</p>
   1501 
   1502     <p>In this fragment, the attribute's value is "<code data-x="">?bill&amp;ted</code>":</p>
   1503 
   1504     <pre class="bad">&lt;a href="?bill&amp;ted">Bill and Ted&lt;/a></pre>
   1505 
   1506     <p>In the following fragment, however, the attribute's value is actually "<code
   1507     data-x="">?art&copy;</code>", <em>not</em> the intended "<code data-x="">?art&amp;copy</code>",
   1508     because even without the final semicolon, "<code data-x="">&amp;copy</code>" is handled the same
   1509     as "<code data-x="">&amp;copy;</code>" and thus gets interpreted as "<code
   1510     data-x="">&copy;</code>":</p>
   1511 
   1512     <pre class="bad">&lt;a href="?art&amp;copy">Art and Copy&lt;/a></pre>
   1513 
   1514     <p>To avoid this problem, all named character references are required to end with a semicolon,
   1515     and uses of named character references without a semicolon are flagged as errors.</p>
   1516 
   1517     <p>Thus, the correct way to express the above cases is as
   1518     follows:</p>
   1519 
   1520     <pre>&lt;a href="?bill&amp;ted">Bill and Ted&lt;/a> &lt;!-- &amp;ted is ok, since it's not a named character reference --></pre>
   1521     <pre>&lt;a href="?art&amp;amp;copy">Art and Copy&lt;/a> &lt;!-- the &amp; has to be escaped, since &amp;copy <em>is</em> a named character reference --></pre>
   1522 
   1523    </div>
   1524 
   1525   </dd>
   1526 
   1527 
   1528   <dt>Errors involving known interoperability problems in legacy user agents</dt>
   1529 
   1530   <dd>
   1531 
   1532    <p>Certain syntax constructs are known to cause especially subtle or serious problems in legacy
   1533    user agents, and are therefore marked as non-conforming to help authors avoid them.</p>
   1534 
   1535    <div class="example">
   1536 
   1537     <p>For example, this is why the U+0060 GRAVE ACCENT character (`) is not allowed in unquoted
   1538     attributes. In certain legacy user agents, <!-- namely IE --> it is sometimes treated as a
   1539     quote character.</p>
   1540 
   1541    </div>
   1542 
   1543    <div class="example">
   1544 
   1545     <p>Another example of this is the DOCTYPE, which is required to trigger <span>no-quirks
   1546     mode</span>, because the behavior of legacy user agents in <span>quirks mode</span> is often
   1547     largely undocumented.</p>
   1548 
   1549    </div>
   1550 
   1551   </dd>
   1552 
   1553 
   1554 <!--ADD-TOPIC:Security-->
   1555   <dt>Errors that risk exposing authors to security attacks</dt>
   1556 
   1557   <dd>
   1558 
   1559    <p>Certain restrictions exist purely to avoid known security problems.</p>
   1560 
   1561    <div class="example">
   1562 
   1563     <p>For example, the restriction on using UTF-7 exists purely to avoid authors falling prey to a
   1564     known cross-site-scripting attack using UTF-7. <a href="#refsUTF7">[UTF7]</a></p>
   1565 
   1566    </div>
   1567 
   1568   </dd>
   1569 <!--REMOVE-TOPIC:Security-->
   1570 
   1571 
   1572   <dt>Cases where the author's intent is unclear</dt>
   1573 
   1574   <dd>
   1575 
   1576    <p>Markup where the author's intent is very unclear is often made non-conforming. Correcting
   1577    these errors early makes later maintenance easier.</p>
   1578 
   1579    <div class="example">
   1580 
   1581     <p>For example, it is unclear whether the author intended the following to be an
   1582     <code>h1</code> heading or an <code>h2</code> heading:</p>
   1583 
   1584     <pre class="bad">&lt;h1>Contact details&lt;/h2></pre>
   1585 
   1586    </div>
   1587 
   1588   </dd>
   1589 
   1590 
   1591   <dt>Cases that are likely to be typos</dt>
   1592 
   1593   <dd>
   1594 
   1595    <p>When a user makes a simple typo, it is helpful if the error can be caught early, as this can
   1596    save the author a lot of debugging time. This specification therefore usually considers it an
   1597    error to use element names, attribute names, and so forth, that do not match the names defined
   1598    in this specification.</p>
   1599 
   1600    <div class="example">
   1601 
   1602     <p>For example, if the author typed <code>&lt;capton></code> instead of
   1603     <code>&lt;caption></code>, this would be flagged as an error and the author could correct the
   1604     typo immediately.</p>
   1605 
   1606    </div>
   1607 
   1608   </dd>
   1609 
   1610 
   1611   <dt>Errors that could interfere with new syntax in the future</dt>
   1612 
   1613   <dd>
   1614 
   1615    <p>In order to allow the language syntax to be extended in the future, certain otherwise
   1616    harmless features are disallowed.</p>
   1617 
   1618    <div class="example">
   1619 
   1620     <p>For example, "attributes" in end tags are ignored currently, but they are invalid, in case a
   1621     future change to the language makes use of that syntax feature without conflicting with
   1622     already-deployed (and valid!) content.</p>
   1623 
   1624    </div>
   1625 
   1626   </dd>
   1627 
   1628 
   1629  </dl>
   1630 
   1631  <p>Some authors find it helpful to be in the practice of always quoting all attributes and always
   1632  including all optional tags, preferring the consistency derived from such custom over the minor
   1633  benefits of terseness afforded by making use of the flexibility of the HTML syntax. To aid such
   1634  authors, conformance checkers can provide modes of operation wherein such conventions are
   1635  enforced.</p>
   1636 
   1637 
   1638 
   1639  <h4>Restrictions on content models and on attribute values</h4>
   1640 
   1641  <!-- NON-NORMATIVE SECTION -->
   1642 
   1643  <p>Beyond the syntax of the language, this specification also places restrictions on how elements
   1644  and attributes can be specified. These restrictions are present for similar reasons:</p>
   1645 
   1646  <dl>
   1647 
   1648 
   1649   <dt>Errors involving content with dubious semantics</dt>
   1650 
   1651   <dd>
   1652 
   1653    <p>To avoid misuse of elements with defined meanings, content models are defined that restrict
   1654    how elements can be nested when such nestings would be of dubious value.</p>
   1655 
   1656    <p class="example">For example, this specification disallows nesting a <code>section</code>
   1657    element inside a <code>kbd</code> element, since it is highly unlikely for an author to indicate
   1658    that an entire section should be keyed in.</p>
   1659 
   1660   </dd>
   1661 
   1662 
   1663   <dt>Errors that involve a conflict in expressed semantics</dt>
   1664 
   1665   <dd>
   1666 
   1667    <p>Similarly, to draw the author's attention to mistakes in the use of elements, clear
   1668    contradictions in the semantics expressed are also considered conformance errors.</p>
   1669 
   1670    <div class="example">
   1671 
   1672     <p>In the fragments below, for example, the semantics are nonsensical: a separator cannot
   1673     simultaneously be a cell, nor can a radio button be a progress bar.</p>
   1674 
   1675     <pre class="bad">&lt;hr role="cell"></pre>
   1676     <pre class="bad">&lt;input type=radio role=progressbar></pre>
   1677 
   1678    </div>
   1679 
   1680    <p class="example">Another example is the restrictions on the content models of the
   1681    <code>ul</code> element, which only allows <code>li</code> element children. Lists by definition
   1682    consist just of zero or more list items, so if a <code>ul</code> element contains something
   1683    other than an <code>li</code> element, it's not clear what was meant.</p>
   1684 
   1685   </dd>
   1686 
   1687 
   1688   <dt>Cases where the default styles are likely to lead to confusion</dt>
   1689 
   1690   <dd>
   1691 
   1692    <p>Certain elements have default styles or behaviors that make certain combinations likely to
   1693    lead to confusion. Where these have equivalent alternatives without this problem, the confusing
   1694    combinations are disallowed.</p>
   1695 
   1696    <p class="example">For example, <code>div</code> elements are rendered as block boxes, and
   1697    <code>span</code> elements as inline boxes. Putting a block box in an inline box is
   1698    unnecessarily confusing; since either nesting just <code>div</code> elements, or nesting just
   1699    <code>span</code> elements, or nesting <code>span</code> elements inside <code>div</code>
   1700    elements all serve the same purpose as nesting a <code>div</code> element in a <code>span</code>
   1701    element, but only the latter involves a block box in an inline box, the latter combination is
   1702    disallowed.</p>
   1703 
   1704    <p class="example">Another example would be the way <span>interactive content</span> cannot be
   1705    nested. For example, a <code>button</code> element cannot contain a <code>textarea</code>
   1706    element. This is because the default behavior of such nesting interactive elements would be
   1707    highly confusing to users. Instead of nesting these elements, they can be placed side by
   1708    side.</p>
   1709 
   1710   </dd>
   1711 
   1712 
   1713   <dt>Errors that indicate a likely misunderstanding of the specification</dt>
   1714 
   1715   <dd>
   1716 
   1717    <p>Sometimes, something is disallowed because allowing it would likely cause author
   1718    confusion.</p>
   1719 
   1720    <p class="example">For example, setting the <code data-x="attr-fe-disabled">disabled</code>
   1721    attribute to the value "<code data-x="">false</code>" is disallowed, because despite the
   1722    appearance of meaning that the element is enabled, it in fact means that the element is
   1723    <em>disabled</em> (what matters for implementations is the presence of the attribute, not its
   1724    value).</p>
   1725 
   1726   </dd>
   1727 
   1728 
   1729   <dt>Errors involving limits that have been imposed merely to simplify the language</dt>
   1730 
   1731   <dd>
   1732 
   1733    <p>Some conformance errors simplify the language that authors need to learn.</p>
   1734 
   1735    <p class="example">For example, the <code>area</code> element's <code
   1736    data-x="attr-area-shape">shape</code> attribute, despite accepting both <code
   1737    data-x="attr-area-shape-keyword-circ">circ</code> and <code
   1738    data-x="attr-area-shape-keyword-circle">circle</code> values in practice as synonyms, disallows
   1739    the use of the <code data-x="attr-area-shape-keyword-circ">circ</code> value, so as to simplify
   1740    tutorials and other learning aids. There would be no benefit to allowing both, but it would
   1741    cause extra confusion when teaching the language.</p>
   1742 
   1743   </dd>
   1744 
   1745 
   1746   <dt>Errors that involve peculiarities of the parser</dt>
   1747 
   1748   <dd>
   1749 
   1750    <p>Certain elements are parsed in somewhat eccentric ways (typically for historical reasons),
   1751    and their content model restrictions are intended to avoid exposing the author to these
   1752    issues.</p>
   1753 
   1754    <div class="example">
   1755 
   1756     <p>For example, a <code>form</code> element isn't allowed inside <span>phrasing content</span>,
   1757     because when parsed as HTML, a <code>form</code> element's start tag will imply a
   1758     <code>p</code> element's end tag. Thus, the following markup results in two <span
   1759     data-x="paragraph">paragraphs</span>, not one:</p>
   1760 
   1761     <pre>&lt;p>Welcome. &lt;form>&lt;label>Name:&lt;/label> &lt;input>&lt;/form></pre>
   1762 
   1763     <p>It is parsed exactly like the following:</p>
   1764 
   1765     <pre>&lt;p>Welcome. &lt;/p>&lt;form>&lt;label>Name:&lt;/label> &lt;input>&lt;/form></pre>
   1766 
   1767    </div>
   1768 
   1769   </dd>
   1770 
   1771 
   1772   <dt>Errors that would likely result in scripts failing in hard-to-debug ways</dt>
   1773 
   1774   <dd>
   1775 
   1776    <p>Some errors are intended to help prevent script problems that would be hard to debug.</p>
   1777 
   1778    <p class="example">This is why, for instance, it is non-conforming to have two <code
   1779    data-x="attr-id">id</code> attributes with the same value. Duplicate IDs lead to the wrong
   1780    element being selected, with sometimes disastrous effects whose cause is hard to determine.</p>
   1781 
   1782   </dd>
   1783 
   1784 
   1785   <dt>Errors that waste authoring time</dt>
   1786 
   1787   <dd>
   1788 
   1789    <p>Some constructs are disallowed because historically they have been the cause of a lot of
   1790    wasted authoring time, and by encouraging authors to avoid making them, authors can save time in
   1791    future efforts.</p>
   1792 
   1793    <p class="example">For example, a <code>script</code> element's <code
   1794    data-x="attr-script-src">src</code> attribute causes the element's contents to be ignored.
   1795    However, this isn't obvious, especially if the element's contents appear to be executable script
   1796    &mdash; which can lead to authors spending a lot of time trying to debug the inline script
   1797    without realizing that it is not executing. To reduce this problem, this specification makes it
   1798    non-conforming to have executable script in a <code>script</code> element when the <code
   1799    data-x="attr-script-src">src</code> attribute is present. This means that authors who are
   1800    validating their documents are less likely to waste time with this kind of mistake.</p>
   1801 
   1802   </dd>
   1803 
   1804 
   1805   <dt>Errors that involve areas that affect authors migrating to and from XHTML</dt>
   1806 
   1807   <dd>
   1808 
   1809    <p>Some authors like to write files that can be interpreted as both XML and HTML with similar
   1810    results. Though this practice is discouraged in general due to the myriad of subtle
   1811    complications involved (especially when involving scripting, styling, or any kind of automated
   1812    serialisation), this specification has a few restrictions intended to at least somewhat mitigate
   1813    the difficulties. This makes it easier for authors to use this as a transitionary step when
   1814    migrating between HTML and XHTML.</p>
   1815 
   1816    <p class="example">For example, there are somewhat complicated rules surrounding the <code
   1817    data-x="attr-lang">lang</code> and <code data-x="attr-xml-lang">xml:lang</code> attributes
   1818    intended to keep the two synchronized.</p>
   1819 
   1820    <p class="example">Another example would be the restrictions on the values of <code
   1821    data-x="">xmlns</code> attributes in the HTML serialisation, which are intended to ensure that
   1822    elements in conforming documents end up in the same namespaces whether processed as HTML or
   1823    XML.</p>
   1824 
   1825   </dd>
   1826 
   1827 
   1828   <dt>Errors that involve areas reserved for future expansion</dt>
   1829 
   1830   <dd>
   1831 
   1832    <p>As with the restrictions on the syntax intended to allow for new syntax in future revisions
   1833    of the language, some restrictions on the content models of elements and values of attributes
   1834    are intended to allow for future expansion of the HTML vocabulary.</p>
   1835 
   1836    <p class="example">For example, limiting the values of the <code
   1837    data-x="attr-hyperlink-target">target</code> attribute that start with an U+005F LOW LINE
   1838    character (_) to only specific predefined values allows new predefined values to be introduced
   1839    at a future time without conflicting with author-defined values.</p>
   1840 
   1841   </dd>
   1842 
   1843 
   1844   <dt>Errors that indicate a mis-use of other specifications</dt>
   1845 
   1846   <dd>
   1847 
   1848    <p>Certain restrictions are intended to support the restrictions made by other
   1849    specifications.</p>
   1850 
   1851    <p class="example">For example, requiring that attributes that take media queries use only
   1852    <em>valid</em> media queries reinforces the importance of following the conformance rules of
   1853    that specification.</p>
   1854 
   1855   </dd>
   1856 
   1857  </dl>
   1858 
   1859 
   1860 
   1861  <h3>Suggested reading</h3>
   1862 
   1863  <!-- NON-NORMATIVE SECTION -->
   1864 
   1865  <p>The following documents might be of interest to readers of this specification.</p>
   1866 
   1867  <dl>
   1868 
   1869   <dt><cite>Character Model for the World Wide Web 1.0: Fundamentals</cite> <a href="#refsCHARMOD">[CHARMOD]</a></dt>
   1870 
   1871   <dd><blockquote><p>This Architectural Specification provides authors of specifications, software
   1872   developers, and content developers with a common reference for interoperable text manipulation on
   1873   the World Wide Web, building on the Universal Character Set, defined jointly by the Unicode
   1874   Standard and ISO/IEC 10646. Topics addressed include use of the terms 'character', 'encoding' and
   1875   'string', a reference processing model, choice and identification of character encodings,
   1876   character escaping, and string indexing.</p></blockquote></dd>
   1877 
   1878   <dt><cite>Unicode Security Considerations</cite> <a href="#refsUTR36">[UTR36]</a></dt>
   1879 
   1880   <dd><blockquote><p>Because Unicode contains such a large number of characters and incorporates
   1881   the varied writing systems of the world, incorrect usage can expose programs or systems to
   1882   possible security attacks. This is especially important as more and more products are
   1883   internationalized. This document describes some of the security considerations that programmers,
   1884   system analysts, standards developers, and users should take into account, and provides specific
   1885   recommendations to reduce the risk of problems.</p></blockquote></dd>
   1886 
   1887   <dt><cite>Web Content Accessibility Guidelines (WCAG) 2.0</cite> <a href="#refsWCAG">[WCAG]</a></dt>
   1888 
   1889   <dd><blockquote><p>Web Content Accessibility Guidelines (WCAG) 2.0 covers a wide range of
   1890   recommendations for making Web content more accessible. Following these guidelines will make
   1891   content accessible to a wider range of people with disabilities, including blindness and low
   1892   vision, deafness and hearing loss, learning disabilities, cognitive limitations, limited
   1893   movement, speech disabilities, photosensitivity and combinations of these. Following these
   1894   guidelines will also often make your Web content more usable to users in
   1895   general.</p></blockquote></dd>
   1896 
   1897   <dt class="nodev"><cite>Authoring Tool Accessibility Guidelines (ATAG) 2.0</cite> <a href="#refsATAG">[ATAG]</a></dt>
   1898 
   1899   <dd class="nodev"><blockquote><p>This specification provides guidelines for designing Web content
   1900   authoring tools that are more accessible for people with disabilities. An authoring tool that
   1901   conforms to these guidelines will promote accessibility by providing an accessible user interface
   1902   to authors with disabilities as well as by enabling, supporting, and promoting the production of
   1903   accessible Web content by all authors.</p></blockquote></dd>
   1904 
   1905   <dt class="nodev"><cite>User Agent Accessibility Guidelines (UAAG) 2.0</cite> <a href="#refsUAAG">[UAAG]</a></dt>
   1906 
   1907   <dd class="nodev"><blockquote><p>This document provides guidelines for designing user agents that
   1908   lower barriers to Web accessibility for people with disabilities. User agents include browsers
   1909   and other types of software that retrieve and render Web content. A user agent that conforms to
   1910   these guidelines will promote accessibility through its own user interface and through other
   1911   internal facilities, including its ability to communicate with other technologies (especially
   1912   assistive technologies). Furthermore, all users, not just users with disabilities, should find
   1913   conforming user agents to be more usable.</p></blockquote></dd>
   1914 
   1915  </dl>
   1916 
   1917 
   1918 
   1919  <h2 id="infrastructure">Common infrastructure</h2>
   1920 
   1921  <h3>Terminology</h3>
   1922 
   1923  <p>This specification refers to both HTML and XML attributes and IDL attributes, often in the same
   1924  context. When it is not clear which is being referred to, they are referred to as <dfn
   1925  data-x="">content attributes</dfn> for HTML and XML attributes, and <dfn data-x="">IDL
   1926  attributes</dfn> for those defined on IDL interfaces. Similarly, the term "properties" is used for
   1927  both JavaScript object properties and CSS properties. When these are ambiguous they are qualified
   1928  as <dfn data-x="">object properties</dfn> and <dfn data-x="">CSS properties</dfn> respectively.</p>
   1929 
   1930  <p>Generally, when the specification states that a feature applies to <span>the HTML syntax</span>
   1931  or <span>the XHTML syntax</span>, it also includes the other. When a feature specifically only
   1932  applies to one of the two languages, it is called out by explicitly stating that it does not apply
   1933  to the other format, as in "for HTML, ... (this does not apply to XHTML)".</p>
   1934 
   1935  <p>This specification uses the term <dfn data-x="">document</dfn> to refer to any use of HTML,
   1936  ranging from short static documents to long essays or reports with rich multimedia, as well as to
   1937  fully-fledged interactive applications. The term is used to refer both to <code>Document</code>
   1938  objects and their descendant DOM trees, and to serialised byte streams using the <span data-x="the
   1939  HTML syntax">HTML syntax</span> or <span data-x="the XHTML syntax">XHTML syntax</span>, depending
   1940  on context.</p>
   1941 
   1942  <p>In the context of the DOM structures, the terms <span data-x="HTML documents">HTML
   1943  document</span> and <span data-x="XML documents">XML document</span> are used as defined in the DOM
   1944  specification, and refer specifically to two different modes that <code>Document</code> objects
   1945  can find themselves in. <a href="#refsDOM">[DOM]</a> (Such uses are always hyperlinked to their
   1946  definition.)</p>
   1947 
   1948  <p>In the context of byte streams, the term HTML document refers to resources labeled as
   1949  <code>text/html</code>, and the term XML document refers to resources labeled with an <span>XML
   1950  MIME type</span>.</p>
   1951 
   1952  <p>The term <dfn>XHTML document</dfn> is used to refer to both <code>Document</code>s in the <span
   1953  data-x="XML documents">XML document</span> mode that contains element nodes in the <span>HTML
   1954  namespace</span>, and byte streams labeled with an <span>XML MIME type</span> that contain
   1955  elements from the <span>HTML namespace</span>, depending on context.</p>
   1956 
   1957  <hr>
   1958 
   1959  <p>For simplicity, terms such as <dfn data-x="">shown</dfn>, <dfn data-x="">displayed</dfn>, and
   1960  <dfn data-x="">visible</dfn> might sometimes be used when referring to the way a document is
   1961  rendered to the user. These terms are not meant to imply a visual medium; they must be considered
   1962  to apply to other media in equivalent ways.</p>
   1963 
   1964  <div class="nodev">
   1965 
   1966  <p>When an algorithm B says to return to another algorithm A, it implies that A called B. Upon
   1967  returning to A, the implementation must continue from where it left off in calling B.</p>
   1968 
   1969  </div>
   1970 
   1971  <!-- should find somewhere more appropriate to put this -->
   1972  <p>The term "transparent black" refers to the colour with red, green, blue, and alpha channels all
   1973  set to zero.</p>
   1974 
   1975 
   1976  <h4>Resources</h4>
   1977 
   1978  <p>The specification uses the term <dfn data-x="">supported</dfn> when referring to whether a user
   1979  agent has an implementation capable of decoding the semantics of an external resource. A format or
   1980  type is said to be <i>supported</i> if the implementation can process an external resource of that
   1981  format or type without critical aspects of the resource being ignored. Whether a specific resource
   1982  is <i>supported</i> can depend on what features of the resource's format are in use.</p>
   1983 
   1984  <p class="example">For example, a PNG image would be considered to be in a supported format if its
   1985  pixel data could be decoded and rendered, even if, unbeknownst to the implementation, the image
   1986  also contained animation data.</p>
   1987 
   1988  <p class="example">An MPEG-4 video file would not be considered to be in a supported format if the
   1989  compression format used was not supported, even if the implementation could determine the
   1990  dimensions of the movie from the file's metadata.</p>
   1991 
   1992  <p>What some specifications, in particular the HTTP specification, refer to as a
   1993  <i>representation</i> is referred to in this specification as a <dfn data-x="">resource</dfn>. <a
   1994  href="#refsHTTP">[HTTP]</a></p>
   1995 
   1996  <p>The term <dfn>MIME type</dfn> is used to refer to what is sometimes called an <i>Internet media
   1997  type</i> in protocol literature. The term <i>media type</i> in this specification is used to refer
   1998  to the type of media intended for presentation, as used by the CSS specifications. <a
   1999  href="#refsRFC2046">[RFC2046]</a> <a href="#refsMQ">[MQ]</a></p>
   2000 
   2001  <p>A string is a <dfn>valid MIME type</dfn> if it matches the <code data-x="">media-type</code>
   2002  rule defined in section 3.7 "Media Types" of RFC 2616. In particular, a <span>valid MIME
   2003  type</span> may include MIME type parameters. <a href="#refsHTTP">[HTTP]</a></p>
   2004 
   2005  <p>A string is a <dfn>valid MIME type with no parameters</dfn> if it matches the <code
   2006  data-x="">media-type</code> rule defined in section 3.7 "Media Types" of RFC 2616, but does not
   2007  contain any U+003B SEMICOLON characters (;). In other words, if it consists only of a type and
   2008  subtype, with no MIME Type parameters. <a href="#refsHTTP">[HTTP]</a></p>
   2009 
   2010  <p>The term <dfn>HTML MIME type</dfn> is used to refer to the <span>MIME type</span>
   2011  <code>text/html</code>.</p>
   2012 
   2013  <p>A resource's <dfn>critical subresources</dfn> are those that the resource needs to have
   2014  available to be correctly processed. Which resources are considered critical or not is defined by
   2015  the specification that defines the resource's format.</p>
   2016 
   2017  <p>The term <dfn data-x="data protocol"><code data-x="">data:</code> URL</dfn> refers to <span
   2018  data-x="URL">URLs</span> that use the <code data-x="">data:</code> scheme. <a
   2019  href="#refsRFC2397">[RFC2397]</a></p>
   2020 
   2021 
   2022  <h4>XML</h4>
   2023 
   2024  <p id="html-namespace">To ease migration from HTML to XHTML, UAs conforming to this specification
   2025  will place elements in HTML in the <code>http://www.w3.org/1999/xhtml</code> namespace, at least
   2026  for the purposes of the DOM and CSS. The term "<dfn>HTML elements</dfn>", when used in this
   2027  specification, refers to any element in that namespace, and thus refers to both HTML and XHTML
   2028  elements.</p>
   2029 
   2030  <p>Except where otherwise stated, all elements defined or mentioned in this specification are in
   2031  the <span>HTML namespace</span> ("<code>http://www.w3.org/1999/xhtml</code>"), and all attributes
   2032  defined or mentioned in this specification have no namespace.</p>
   2033 
   2034  <p>The term <dfn>element type</dfn> is used to refer to the set of elements that have a given
   2035  local name and namespace. For example, <code>button</code> elements are elements with the element
   2036  type <code>button</code>, meaning they have the local name "<code data-x="">button</code>" and
   2037  (implicitly as defined above) the <span>HTML namespace</span>.</p>
   2038 
   2039  <p>Attribute names are said to be <dfn>XML-compatible</dfn> if they match the <a
   2040  href="http://www.w3.org/TR/xml/#NT-Name"><code data-x="">Name</code></a> production defined in XML
   2041  and they contain no U+003A COLON characters (:). <a href="#refsXML">[XML]</a></p>
   2042 
   2043  <p>The term <dfn>XML MIME type</dfn> is used to refer to the <span data-x="MIME type">MIME
   2044  types</span> <code data-x="">text/xml</code>, <code data-x="">application/xml</code>, and any
   2045  <span>MIME type</span> whose subtype ends with the four characters "<code data-x="">+xml</code>".
   2046  <a href="#refsRFC3023">[RFC3023]</a></p>
   2047 
   2048 
   2049  <h4>DOM trees</h4>
   2050 
   2051  <p>The <dfn>root element of a <code>Document</code> object</dfn> is that <code>Document</code>'s
   2052  first element child, if any. If it does not have one then the <code>Document</code> has no root
   2053  element.</p>
   2054 
   2055  <p>The term <dfn>root element</dfn>, when not referring to a <code>Document</code> object's root
   2056  element, means the furthest ancestor element node of whatever node is being discussed, or the node
   2057  itself if it has no ancestors. When the node is a part of the document, then the node's <span>root
   2058  element</span> is indeed the document's root element; however, if the node is not currently part
   2059  of the document tree, the root element will be an orphaned node.</p>
   2060 
   2061  <p>When an element's <span>root element</span> is the <span>root element of a
   2062  <code>Document</code> object</span>, it is said to be <dfn>in a <code>Document</code></dfn>. An
   2063  element is said to have been <dfn data-x="insert an element into a document">inserted into a
   2064  document</dfn> when its <span>root element</span> changes and is now the document's <span>root
   2065  element</span>. Analogously, an element is said to have been <dfn data-x="remove an element from a
   2066  document">removed from a document</dfn> when its <span>root element</span> changes from being the
   2067  document's <span>root element</span> to being another element.</p>
   2068 
   2069  <p>A node's <dfn>home subtree</dfn> is the subtree rooted at that node's <span>root
   2070  element</span>. When a node is <span>in a <code>Document</code></span>, its <span>home
   2071  subtree</span> is that <code>Document</code>'s tree.</p>
   2072 
   2073  <p>The <code>Document</code> of a <code>Node</code> (such as an element) is the
   2074  <code>Document</code> that the <code>Node</code>'s <code
   2075  data-x="dom-Node-ownerDocument">ownerDocument</code> IDL attribute returns. When a
   2076  <code>Node</code> is <span>in a <code>Document</code></span> then that <code>Document</code> is
   2077  always the <code>Node</code>'s <code>Document</code>, and the <code>Node</code>'s <code
   2078  data-x="dom-Node-ownerDocument">ownerDocument</code> IDL attribute thus always returns that
   2079  <code>Document</code>.</p>
   2080 
   2081  <p>The <code>Document</code> of a content attribute is the <code>Document</code> of the
   2082  attribute's element.</p>
   2083 
   2084  <p>The term <dfn>tree order</dfn> means a pre-order, depth-first traversal of DOM nodes involved
   2085  (through the <code data-x="dom-Node-parentNode">parentNode</code>/<code
   2086  data-x="dom-Node-childNodes">childNodes</code> relationship).</p>
   2087 
   2088  <p>When it is stated that some element or attribute is <dfn data-x="ignore">ignored</dfn>, or
   2089  treated as some other value, or handled as if it was something else, this refers only to the
   2090  processing of the node after it is in the DOM. <span class="nodev">A user agent must not mutate the
   2091  DOM in such situations.</span></p>
   2092 
   2093  <p>A content attribute is said to <dfn data-x="">change</dfn> value only if its new value is
   2094  different than its previous value; setting an attribute to a value it already has does not change
   2095  it.</p>
   2096 
   2097  <p>The term <dfn data-x="">empty</dfn>, when used of an attribute value, <code>Text</code> node, or
   2098  string, means that the length of the text is zero (i.e. not even containing spaces or <span>control
   2099  characters</span>).</p>
   2100 
   2101 
   2102  <h4>Scripting</h4>
   2103 
   2104  <p>The construction "a <code>Foo</code> object", where <code>Foo</code> is actually an interface,
   2105  is sometimes used instead of the more accurate "an object implementing the interface
   2106  <code>Foo</code>".</p>
   2107 
   2108  <p>An IDL attribute is said to be <dfn data-x="">getting</dfn> when its value is being retrieved
   2109  (e.g. by author script), and is said to be <dfn data-x="">setting</dfn> when a new value is
   2110  assigned to it.</p>
   2111 
   2112  <p>If a DOM object is said to be <dfn>live</dfn>, then the attributes and methods on that object
   2113  <span class="nodev">must</span> operate on the actual underlying data, not a snapshot of the
   2114  data.</p>
   2115 
   2116  <p>In the contexts of events, the terms <i>fire</i> and <i>dispatch</i> are used as defined in the
   2117  DOM specification: <dfn data-x="concept-event-fire">firing</dfn> an event means to create and <span
   2118  data-x="concept-event-dispatch">dispatch</span> it, and <dfn
   2119  data-x="concept-event-dispatch">dispatching</dfn> an event means to follow the steps that propagate
   2120  the event through the tree. The term <dfn data-x="concept-events-trusted">trusted event</dfn> is
   2121  used to refer to events whose <code data-x="dom-event-isTrusted">isTrusted</code> attribute is
   2122  initialised to true. <a href="#refsDOM">[DOM]</a></p>
   2123 
   2124 
   2125  <h4>Plugins</h4>
   2126 
   2127  <p>The term <dfn>plugin</dfn> refers to a user-agent defined set of content handlers used by the
   2128  user agent that can take part in the user agent's rendering of a <code>Document</code> object, but
   2129  that neither act as <span data-x="child browsing context">child browsing contexts</span> of the
   2130  <code>Document</code> nor introduce any <code>Node</code> objects to the <code>Document</code>'s
   2131  DOM.</p>
   2132 
   2133  <p>Typically such content handlers are provided by third parties, though a user agent can also
   2134  designate built-in content handlers as plugins.</p>
   2135 
   2136  <div class="nodev">
   2137 
   2138  <p>A user agent must not consider the types <code>text/plain</code> and
   2139  <code>application/octet-stream</code> as having a registered <span>plugin</span>.</p> <!-- because
   2140  of the way <object> elements handles those types, if nothing else (it also doesn't make any sense
   2141  to have a plugin registered for those types, of course) -->
   2142 
   2143  </div>
   2144 
   2145  <p class="example">One example of a plugin would be a PDF viewer that is instantiated in a
   2146  <span>browsing context</span> when the user navigates to a PDF file. This would count as a plugin
   2147  regardless of whether the party that implemented the PDF viewer component was the same as that
   2148  which implemented the user agent itself. However, a PDF viewer application that launches separate
   2149  from the user agent (as opposed to using the same interface) is not a plugin by this
   2150  definition.</p>
   2151 
   2152  <p class="note">This specification does not define a mechanism for interacting with plugins, as it
   2153  is expected to be user-agent- and platform-specific. Some UAs might opt to support a plugin
   2154  mechanism such as the Netscape Plugin API; others might use remote content converters or have
   2155  built-in support for certain types. Indeed, this specification doesn't require user agents to
   2156  support plugins at all. <a href="#refsNPAPI">[NPAPI]</a></p>
   2157 
   2158  <p>A plugin can be <dfn data-x="concept-plugin-secure">secured</dfn> if it honors the semantics of
   2159  the <code data-x="attr-iframe-sandbox">sandbox</code> attribute.</p>
   2160 
   2161  <p class="example">For example, a secured plugin would prevent its contents from creating pop-up
   2162  windows when the plugin is instantiated inside a sandboxed <code>iframe</code>.</p>
   2163 
   2164  <div class="nodev">
   2165 
   2166  <p class="warning">Browsers should take extreme care when interacting with external content
   2167  intended for <span data-x="plugin">plugins</span>. When third-party software is run with the same
   2168  privileges as the user agent itself, vulnerabilities in the third-party software become as
   2169  dangerous as those in the user agent.</p>
   2170 
   2171  <p>Since different users having differents sets of <span data-x="plugin">plugins</span> provides a
   2172  fingerprinting vector that increases the chances of users being uniquely identified, user agents
   2173  are encouraged to support the exact same set of <span data-x="plugin">plugins</span> for each
   2174  user.
   2175  <!--INSERT FINGERPRINT-->
   2176  </p>
   2177 
   2178  </div>
   2179 
   2180 
   2181 
   2182  <h4 id="encoding-terminology">Character encodings</h4>
   2183 
   2184  <p>A <dfn data-x="encoding">character encoding</dfn>, or just <i>encoding</i> where that is not
   2185  ambiguous, is a defined way to convert between byte streams and Unicode strings, as defined in the
   2186  WHATWG Encoding standard. An <span>encoding</span> has an <dfn>encoding name</dfn> and one or more
   2187  <dfn data-x="encoding label">encoding labels</dfn>, referred to as the encoding's <i>name</i> and
   2188  <i>labels</i> in the Encoding standard. <a href="#refsENCODING">[ENCODING]</a></p>
   2189 
   2190  <p>An <dfn>ASCII-compatible character encoding</dfn> is a single-byte or variable-length
   2191  <span>encoding</span> in which the bytes 0x09, 0x0A, 0x0C, 0x0D, 0x20 - 0x22, 0x26, 0x27, 0x2C -
   2192  0x3F, 0x41 - 0x5A, and 0x61 - 0x7A<!-- is that list ok? do any character sets we want to support
   2193  do things outside that range? -->, ignoring bytes that are the second and later bytes of multibyte
   2194  sequences, all correspond to single-byte sequences that map to the same Unicode characters as
   2195  those bytes in Windows-1252<!--ANSI_X3.4-1968 (US-ASCII)-->. <a href="#refsENCODING">[ENCODING]</a></p>
   2196 
   2197  <p class="note">This includes such encodings as Shift_JIS, HZ-GB-2312, and variants of ISO-2022,
   2198  even though it is possible in these encodings for bytes like 0x70 to be part of longer sequences
   2199  that are unrelated to their interpretation as ASCII. It excludes UTF-16 variants, as well as
   2200  obsolete legacy encodings such as UTF-7, GSM03.38, and EBCDIC variants.</p>
   2201 
   2202  <!--
   2203   We'll have to change that if anyone comes up with a way to have a document that is valid as two
   2204   different encodings at once, with different <meta charset> elements applying in each case.
   2205  -->
   2206 
   2207  <p>The term <dfn>a UTF-16 encoding</dfn> refers to any variant of UTF-16: UTF-16LE or UTF-16BE,
   2208  regardless of the presence or absence of a BOM. <a href="#refsENCODING">[ENCODING]</a></p>
   2209 
   2210  <p>The term <dfn>code unit</dfn> is used as defined in the Web IDL specification: a 16 bit
   2211  unsigned integer, the smallest atomic component of a <code>DOMString</code>. (This is a narrower
   2212  definition than the one used in Unicode, and is not the same as a <i>code point</i>.) <a
   2213  href="#refsWEBIDL">[WEBIDL]</a></p>
   2214 
   2215  <p>The term <dfn>Unicode code point</dfn> means a <i data-x="">Unicode scalar value</i> where
   2216  possible, and an isolated surrogate code point when not. When a conformance requirement is defined
   2217  in terms of characters or Unicode code points, a pair of <span data-x="code unit">code units</span>
   2218  consisting of a high surrogate followed by a low surrogate must be treated as the single code
   2219  point represented by the surrogate pair, but isolated surrogates must each be treated as the
   2220  single code point with the value of the surrogate. <a href="#refsUNICODE">[UNICODE]</a></p>
   2221 
   2222  <p>In this specification, the term <dfn>character</dfn>, when not qualified as <em>Unicode</em>
   2223  character, is synonymous with the term <span>Unicode code point</span>.</p>
   2224 
   2225  <p>The term <dfn>Unicode character</dfn> is used to mean a <i data-x="">Unicode scalar value</i>
   2226  (i.e. any Unicode code point that is not a surrogate code point). <a
   2227  href="#refsUNICODE">[UNICODE]</a></p>
   2228 
   2229  <p>The <dfn>code-unit length</dfn> of a string is the number of <span data-x="code unit">code
   2230  units</span> in that string.</p>
   2231 
   2232  <p class="note">This complexity results from the historical decision to define the DOM API in
   2233  terms of 16 bit (UTF-16) <span data-x="code unit">code units</span>, rather than in terms of <span
   2234  data-x="Unicode character">Unicode characters</span>.</p>
   2235 
   2236 
   2237 
   2238  <div class="nodev">
   2239 
   2240  <h3>Conformance requirements</h3>
   2241 
   2242  <p>All diagrams, examples, and notes in this specification are non-normative, as are all sections
   2243  explicitly marked non-normative. Everything else in this specification is normative.</p>
   2244 
   2245  <p>The key words "MUST", "MUST NOT", <!--"REQUIRED",--> <!--"SHALL", "SHALL NOT",--> "SHOULD", "SHOULD
   2246  NOT", <!--"RECOMMENDED", "NOT RECOMMENDED",--> "MAY", and "OPTIONAL" in the normative parts of
   2247  this document are to be interpreted as described in RFC2119. The key word "OPTIONALLY" in the
   2248  normative parts of this document is to be interpreted with the same normative meaning as "MAY" and
   2249  "OPTIONAL". For readability, these words do not appear in all uppercase letters in this
   2250  specification. <a href="#refsRFC2119">[RFC2119]</a></p>
   2251 
   2252  <p>Requirements phrased in the imperative as part of algorithms (such as "strip any leading space
   2253  characters" or "return false and abort these steps") are to be interpreted with the meaning of the
   2254  key word ("must", "should", "may", etc) used in introducing the algorithm.</p>
   2255 
   2256  <div class="example">
   2257 
   2258   <p>For example, were the spec to say:</p>
   2259 
   2260   <pre>To eat an orange, the user must:
   2261 1. Peel the orange.
   2262 2. Separate each slice of the orange.
   2263 3. Eat the orange slices.</pre>
   2264 
   2265   <p>...it would be equivalent to the following:</p>
   2266 
   2267   <pre>To eat an orange:
   2268 1. The user must peel the orange.
   2269 2. The user must separate each slice of the orange.
   2270 3. The user must eat the orange slices.</pre>
   2271 
   2272   <p>Here the key word is "must".</p>
   2273 
   2274   <p>The former (imperative) style is generally preferred in this specification for stylistic
   2275   reasons.</p>
   2276 
   2277  </div>
   2278 
   2279  <p>Conformance requirements phrased as algorithms or specific steps may be implemented in any
   2280  manner, so long as the end result is equivalent. (In particular, the algorithms defined in this
   2281  specification are intended to be easy to follow, and not intended to be performant.)</p>
   2282 
   2283  </div>
   2284 
   2285 
   2286 
   2287  <div class="nodev">
   2288 
   2289  <h4>Conformance classes</h4>
   2290 
   2291  <p>This specification describes the conformance criteria for <span class="nodev">user agents
   2292  (relevant to implementors) and</span> documents<span class="nodev"> (relevant to authors and
   2293  authoring tool implementors)</span>.</p>
   2294 
   2295  <p><dfn>Conforming documents</dfn> are those that comply with all the conformance criteria for
   2296  documents. For readability, some of these conformance requirements are phrased as conformance
   2297  requirements on authors; such requirements are implicitly requirements on documents: by
   2298  definition, all documents are assumed to have had an author. (In some cases, that author may
   2299  itself be a user agent &mdash; such user agents are subject to additional rules, as explained
   2300  below.)</p>
   2301 
   2302  <p class="example">For example, if a requirement states that "authors must not use the <code
   2303  data-x="">foobar</code> element", it would imply that documents are not allowed to contain elements
   2304  named <code data-x="">foobar</code>.</p>
   2305 
   2306  <p class="note impl">There is no implied relationship between document conformance requirements
   2307  and implementation conformance requirements. User agents are not free to handle non-conformant
   2308  documents as they please; the processing model described in this specification applies to
   2309  implementations regardless of the conformity of the input documents.</p>
   2310 
   2311  <p>User agents fall into several (overlapping) categories with different conformance
   2312  requirements.</p>
   2313 
   2314  <dl>
   2315 
   2316   <dt id="interactive">Web browsers and other interactive user agents</dt>
   2317 
   2318   <dd>
   2319 
   2320    <p>Web browsers that support <span>the XHTML syntax</span> must process elements and attributes
   2321    from the <span>HTML namespace</span> found in XML documents as described in this specification,
   2322    so that users can interact with them, unless the semantics of those elements have been
   2323    overridden by other specifications.</p>
   2324 
   2325    <p class="example">A conforming XHTML processor would, upon finding an XHTML <code>script</code>
   2326    element in an XML document, execute the script contained in that element. However, if the
   2327    element is found within a transformation expressed in XSLT (assuming the user agent also
   2328    supports XSLT), then the processor would instead treat the <code>script</code> element as an
   2329    opaque element that forms part of the transform.</p>
   2330 
   2331    <p>Web browsers that support <span>the HTML syntax</span> must process documents labeled with an
   2332    <span>HTML MIME type</span> as described in this specification, so that users can interact with
   2333    them.</p>
   2334 
   2335    <p>User agents that support scripting must also be conforming implementations of the IDL
   2336    fragments in this specification, as described in the Web IDL specification. <a
   2337    href="#refsWEBIDL">[WEBIDL]</a></p>
   2338 
   2339    <p class="note">Unless explicitly stated, specifications that override the semantics of HTML
   2340    elements do not override the requirements on DOM objects representing those elements. For
   2341    example, the <code>script</code> element in the example above would still implement the
   2342    <code>HTMLScriptElement</code> interface.</p>
   2343 
   2344   </dd>
   2345 
   2346   <dt id="non-interactive">Non-interactive presentation user agents</dt>
   2347 
   2348   <dd>
   2349 
   2350    <p>User agents that process HTML and XHTML documents purely to render non-interactive versions
   2351    of them must comply to the same conformance criteria as Web browsers, except that they are
   2352    exempt from requirements regarding user interaction.</p>
   2353 
   2354    <p class="note">Typical examples of non-interactive presentation user agents are printers
   2355    (static UAs) and overhead displays (dynamic UAs). It is expected that most static
   2356    non-interactive presentation user agents will also opt to <a href="#non-scripted">lack scripting
   2357    support</a>.</p>
   2358 
   2359    <p class="example">A non-interactive but dynamic presentation UA would still execute scripts,
   2360    allowing forms to be dynamically submitted, and so forth. However, since the concept of "focus"
   2361    is irrelevant when the user cannot interact with the document, the UA would not need to support
   2362    any of the focus-related DOM APIs.</p>
   2363 
   2364   </dd>
   2365 
   2366   <dt id="renderingUA">Visual user agents that support the suggested default rendering</dt>
   2367 
   2368   <dd>
   2369 
   2370    <p>User agents, whether interactive or not, may be designated (possibly as a user option) as
   2371    supporting the suggested default rendering defined by this specification.</p>
   2372 
   2373    <p>This is not required. In particular, even user agents that do implement the suggested default
   2374    rendering are encouraged to offer settings that override this default to improve the experience
   2375    for the user, e.g. changing the colour contrast, using different focus styles, or otherwise
   2376    making the experience more accessible and usable to the user.</p>
   2377 
   2378    <p>User agents that are designated as supporting the suggested default rendering must, while so
   2379    designated, implement the rules in <a href="#rendering">the rendering section</a> that that
   2380    section defines as the behavior that user agents are <em>expected</em> to implement.</p>
   2381 
   2382   </dd>
   2383 
   2384   <dt id="non-scripted">User agents with no scripting support</dt>
   2385 
   2386   <dd>
   2387 
   2388    <p>Implementations that do not support scripting (or which have their scripting features
   2389    disabled entirely) are exempt from supporting the events and DOM interfaces mentioned in this
   2390    specification. For the parts of this specification that are defined in terms of an events model
   2391    or in terms of the DOM, such user agents must still act as if events and the DOM were
   2392    supported.</p>
   2393 
   2394    <p class="note">Scripting can form an integral part of an application. Web browsers that do not
   2395    support scripting, or that have scripting disabled, might be unable to fully convey the author's
   2396    intent.</p>
   2397 
   2398   </dd>
   2399 
   2400 
   2401   <dt>Conformance checkers</dt>
   2402 
   2403   <dd id="conformance-checkers">
   2404 
   2405    <p>Conformance checkers must verify that a document conforms to the applicable conformance
   2406    criteria described in this specification. Automated conformance checkers are exempt from
   2407    detecting errors that require interpretation of the author's intent (for example, while a
   2408    document is non-conforming if the content of a <code>blockquote</code> element is not a quote,
   2409    conformance checkers running without the input of human judgement do not have to check that
   2410    <code>blockquote</code> elements only contain quoted material).</p>
   2411 
   2412    <p>Conformance checkers must check that the input document conforms when parsed without a
   2413    <span>browsing context</span> (meaning that no scripts are run, and that the parser's
   2414    <span>scripting flag</span> is disabled), and should also check that the input document conforms
   2415    when parsed with a <span>browsing context</span> in which scripts execute, and that the scripts
   2416    never cause non-conforming states to occur other than transiently during script execution
   2417    itself. (This is only a "SHOULD" and not a "MUST" requirement because it has been proven to be
   2418    impossible. <a href="#refsCOMPUTABLE">[COMPUTABLE]</a>)</p>
   2419 
   2420    <p>The term "HTML validator" can be used to refer to a conformance checker that itself conforms
   2421    to the applicable requirements of this specification.</p>
   2422 
   2423    <div class="note">
   2424 
   2425     <p>XML DTDs cannot express all the conformance requirements of this specification. Therefore, a
   2426     validating XML processor and a DTD cannot constitute a conformance checker. Also, since neither
   2427     of the two authoring formats defined in this specification are applications of SGML, a
   2428     validating SGML system cannot constitute a conformance checker either.</p>
   2429 
   2430     <p>To put it another way, there are three types of conformance criteria:</p>
   2431 
   2432     <ol>
   2433 
   2434      <li>Criteria that can be expressed in a DTD.</li>
   2435 
   2436      <li>Criteria that cannot be expressed by a DTD, but can still be checked by a machine.</li>
   2437 
   2438      <li>Criteria that can only be checked by a human.</li>
   2439 
   2440     </ol>
   2441 
   2442     <p>A conformance checker must check for the first two. A simple DTD-based validator only checks
   2443     for the first class of errors and is therefore not a conforming conformance checker according
   2444     to this specification.</p>
   2445 
   2446    </div>
   2447   </dd>
   2448 
   2449 
   2450   <dt>Data mining tools</dt>
   2451 
   2452   <dd id="data-mining">
   2453 
   2454    <p>Applications and tools that process HTML and XHTML documents for reasons other than to either
   2455    render the documents or check them for conformance should act in accordance with the semantics
   2456    of the documents that they process.</p>
   2457 
   2458    <p class="example">A tool that generates <span data-x="outline">document outlines</span> but
   2459    increases the nesting level for each paragraph and does not increase the nesting level for each
   2460    section would not be conforming.</p>
   2461 
   2462   </dd>
   2463 
   2464 
   2465   <dt id="editors">Authoring tools and markup generators</dt>
   2466 
   2467   <dd>
   2468 
   2469    <p>Authoring tools and markup generators must generate <span>conforming documents</span>.
   2470    Conformance criteria that apply to authors also apply to authoring tools, where appropriate.</p>
   2471 
   2472    <p>Authoring tools are exempt from the strict requirements of using elements only for their
   2473    specified purpose, but only to the extent that authoring tools are not yet able to determine
   2474    author intent. However, authoring tools must not automatically misuse elements or encourage
   2475    their users to do so.</p>
   2476 
   2477    <p class="example">For example, it is not conforming to use an <code>address</code> element for
   2478    arbitrary contact information; that element can only be used for marking up contact information
   2479    for the author of the document or section. However, since an authoring tool is likely unable to
   2480    determine the difference, an authoring tool is exempt from that requirement. This does not mean,
   2481    though, that authoring tools can use <code>address</code> elements for any block of italics text
   2482    (for instance); it just means that the authoring tool doesn't have to verify that when the user
   2483    uses a tool for inserting contact information for a section, that the user really is doing that
   2484    and not inserting something else instead.</p>
   2485 
   2486    <p class="note">In terms of conformance checking, an editor has to output documents that conform
   2487    to the same extent that a conformance checker will verify.</p>
   2488 
   2489    <p>When an authoring tool is used to edit a non-conforming document, it may preserve the
   2490    conformance errors in sections of the document that were not edited during the editing session
   2491    (i.e. an editing tool is allowed to round-trip erroneous content). However, an authoring tool
   2492    must not claim that the output is conformant if errors have been so preserved.</p>
   2493 
   2494    <p>Authoring tools are expected to come in two broad varieties: tools that work from structure
   2495    or semantic data, and tools that work on a What-You-See-Is-What-You-Get media-specific editing
   2496    basis (WYSIWYG).</p>
   2497 
   2498    <p>The former is the preferred mechanism for tools that author HTML, since the structure in the
   2499    source information can be used to make informed choices regarding which HTML elements and
   2500    attributes are most appropriate.</p>
   2501 
   2502    <p>However, WYSIWYG tools are legitimate. WYSIWYG tools should use elements they know are
   2503    appropriate, and should not use elements that they do not know to be appropriate. This might in
   2504    certain extreme cases mean limiting the use of flow elements to just a few elements, like
   2505    <code>div</code>, <code>b</code>, <code>i</code>, and <code>span</code> and making liberal use
   2506    of the <code data-x="attr-style">style</code> attribute.</p>
   2507 
   2508    <p>All authoring tools, whether WYSIWYG or not, should make a best effort attempt at enabling
   2509    users to create well-structured, semantically rich, media-independent content.</p>
   2510 
   2511   </dd>
   2512 
   2513  </dl>
   2514 
   2515  <p id="hardwareLimitations">User agents may impose implementation-specific limits on otherwise
   2516  unconstrained inputs, e.g. to prevent denial of service attacks, to guard against running out of
   2517  memory, or to work around platform-specific limitations.
   2518  <!--INSERT FINGERPRINT-->
   2519  </p>
   2520 
   2521  <p>For compatibility with existing content and prior specifications, this specification describes
   2522  two authoring formats: one based on XML (referred to as <span>the XHTML syntax</span>), and one
   2523  using a <a href="#writing">custom format</a> inspired by SGML (referred to as <span>the HTML
   2524  syntax</span>). Implementations must support at least one of these two formats, although
   2525  supporting both is encouraged.</p>
   2526 
   2527  <p>Some conformance requirements are phrased as requirements on elements, attributes, methods or
   2528  objects. Such requirements fall into two categories: those describing content model restrictions,
   2529  and those describing implementation behavior. Those in the former category are requirements on
   2530  documents and authoring tools. Those in the second category are requirements on user agents.
   2531  Similarly, some conformance requirements are phrased as requirements on authors; such requirements
   2532  are to be interpreted as conformance requirements on the documents that authors produce. (In other
   2533  words, this specification does not distinguish between conformance criteria on authors and
   2534  conformance criteria on documents.)</p>
   2535 
   2536  </div>
   2537 
   2538 
   2539  <div class="nodev">
   2540 
   2541  <h4>Dependencies</h4>
   2542 
   2543  <p>This specification relies on several other underlying specifications.</p>
   2544 
   2545  <dl>
   2546 
   2547   <dt>Unicode and Encoding</dt>
   2548 
   2549   <dd>
   2550 
   2551    <p>The Unicode character set is used to represent textual data, and the WHATWG Encoding standard
   2552    defines requirements around <span data-x="encoding">character encodings</span>. <a
   2553    href="#refsUNICODE">[UNICODE]</a></p>
   2554 
   2555    <p class="note">This specification <a href="#encoding-terminology">introduces terminology</a>
   2556    based on the terms defined in those specifications, as described earlier.</p>
   2557 
   2558    <p>The following terms are used as defined in the WHATWG Encoding standard: <a
   2559    href="#refsENCODING">[ENCODING]</a></p>
   2560 
   2561    <ul class="brief">
   2562 
   2563     <li><dfn>Getting an encoding</dfn>
   2564 
   2565     <li>The <dfn>encoder</dfn> and <dfn>decoder</dfn> algorithms for various encodings, including
   2566     the <dfn>UTF-8 encoder</dfn> and <dfn>UTF-8 decoder</dfn>
   2567 
   2568     <li>The generic <dfn>decode</dfn> algorithm which takes a byte stream and an encoding and
   2569     returns a character stream
   2570 
   2571     <li>The <dfn>UTF-8 decode</dfn> algorithm which takes a byte stream and returns a character
   2572     stream, additionally stripping one leading UTF-8 Byte Order Mark (BOM), if any
   2573 
   2574    </ul>
   2575 
   2576    <p class="note">The <span>UTF-8 decoder</span> is distinct from the <i>UTF-8 decode
   2577    algorithm</i>. The latter first strips a Byte Order Mark (BOM), if any, and then invokes the
   2578    former.</p>
   2579 
   2580    <p>For readability, character encodings are sometimes referenced in this specification with a
   2581    case that differs from the canonical case given in the WHATWG Encoding standard. (For example,
   2582    "UTF-16LE" instead of "utf-16le".)</p>
   2583 
   2584   </dd>
   2585 
   2586 
   2587   <dt>XML</dt>
   2588 
   2589   <dd>
   2590 
   2591    <p>Implementations that support <span>the XHTML syntax</span> must support some version of XML,
   2592    as well as its corresponding namespaces specification, because that syntax uses an XML
   2593    serialisation with namespaces. <a href="#refsXML">[XML]</a> <a href="#refsXMLNS">[XMLNS]</a></p>
   2594 
   2595   </dd>
   2596 
   2597 
   2598   <dt>URLs</dt>
   2599 
   2600   <dd>
   2601 
   2602    <p>The following terms are defined in the WHATWG URL standard: <a href="#refsURL">[URL]</a></p>
   2603 
   2604    <ul class="brief">
   2605     <li><dfn>URL</dfn>
   2606     <li><dfn>Absolute URL</dfn>
   2607     <li><dfn>Relative URL</dfn>
   2608     <li><dfn data-x="concept-url-scheme-relative">Relative schemes</dfn>
   2609     <li>The <dfn>URL parser</dfn>
   2610     <li><dfn>Parsed URL</dfn>
   2611     <li>The <dfn data-x="concept-url-scheme">scheme</dfn> component of a <span>parsed URL</span>
   2612     <li>The <dfn data-x="concept-url-scheme-data">scheme data</dfn> component of a <span>parsed URL</span>
   2613     <li>The <dfn data-x="concept-url-username">username</dfn> component of a <span>parsed URL</span>
   2614     <li>The <dfn data-x="concept-url-password">password</dfn> component of a <span>parsed URL</span>
   2615     <li>The <dfn data-x="concept-url-host">host</dfn> component of a <span>parsed URL</span>
   2616     <li>The <dfn data-x="concept-url-port">port</dfn> component of a <span>parsed URL</span>
   2617     <li>The <dfn data-x="concept-url-path">path</dfn> component of a <span>parsed URL</span>
   2618     <li>The <dfn data-x="concept-url-query">query</dfn> component of a <span>parsed URL</span>
   2619     <li>The <dfn data-x="concept-url-fragment">fragment</dfn> component of a <span>parsed URL</span>
   2620     <li><dfn data-x="concept-url-parse-error">Parse errors</dfn> from the <span>URL parser</span>
   2621     <li>The <dfn data-x="concept-url-serializer">URL serializer</dfn>
   2622     <li><dfn>Default encode set</dfn>
   2623     <li><dfn>Percent encode</dfn>
   2624     <li><dfn>UTF-8 percent encode</dfn>
   2625     <li><dfn>Percent decode</dfn>
   2626     <li><dfn>Decoder error</dfn>
   2627     <li>The <dfn>domain label to ASCII</dfn> algorithm</li>
   2628     <li>The <dfn>domain label to Unicode</dfn> algorithm</li>
   2629     <li><dfn><code>URLUtils</code></dfn> interface
   2630     <li><dfn><code>URLUtilsReadOnly</code></dfn> interface
   2631     <li><dfn data-x="dom-url-href"><code>href</code> attribute</dfn>
   2632     <li><dfn data-x="dom-url-protocol"><code>protocol</code> attribute</dfn>
   2633     <li>The <dfn data-x="concept-uu-get-the-base">get the base</dfn> hook for <code>URLUtils</code>
   2634     <li>The <dfn data-x="concept-uu-update">update steps</dfn> hook for <code>URLUtils</code>
   2635     <li>The <dfn data-x="concept-uu-set-the-input">set the input</dfn> algorithm for <code>URLUtils</code>
   2636     <li>The <dfn data-x="concept-uu-query-encoding">query encoding</dfn> of an <code>URLUtils</code> object
   2637     <li>The <dfn data-x="concept-uu-input">input</dfn> of an <code>URLUtils</code> object
   2638     <li>The <dfn data-x="concept-uu-url">url</dfn> of an <code>URLUtils</code> object
   2639    </ul>
   2640 
   2641   </dd>
   2642 
   2643 
   2644   <dt>Cookies</dt>
   2645 
   2646   <dd>
   2647 
   2648    <p>The following terms are defined in the Cookie specification: <a
   2649    href="#refsCOOKIES">[COOKIES]</a></p>
   2650 
   2651    <ul class="brief">
   2652     <li><dfn>cookie-string</dfn>
   2653     <li><dfn>receives a set-cookie-string</dfn>
   2654    </ul>
   2655 
   2656   </dd>
   2657 
   2658 
   2659   <dt>Fetch</dt>
   2660 
   2661   <dd>
   2662 
   2663    <p>The following terms are defined in the WHATWG Fetch specification: <a href="#refsFETCH">[FETCH]</a></p>
   2664 
   2665    <ul class="brief">
   2666     <li><dfn>cross-origin request</dfn>
   2667     <li><dfn>cross-origin request status</dfn>
   2668     <li><dfn>custom request headers</dfn>
   2669     <li><dfn>simple cross-origin request</dfn>
   2670     <li><dfn>redirect steps</dfn>
   2671     <li><dfn>omit credentials flag</dfn>
   2672     <li><dfn>resource sharing check</dfn>
   2673    </ul>
   2674 
   2675    <p class="note">This specification does not yet use the "fetch" algorithm from the WHATWG Fetch
   2676    specification. It will be updated to do so in due course.</p>
   2677 
   2678   </dd>
   2679 
   2680 
   2681 <!--TOPIC:DOM APIs-->
   2682 
   2683   <dt>Web IDL</dt>
   2684 
   2685   <dd>
   2686 
   2687    <p>The IDL fragments in this specification must be interpreted as required for conforming IDL
   2688    fragments, as described in the Web IDL specification. <a href="#refsWEBIDL">[WEBIDL]</a></p>
   2689 
   2690    <p>The terms <dfn>supported property indices</dfn>, <dfn>determine the value of an indexed
   2691    property</dfn>, <dfn>support named properties</dfn>, <dfn>supported property names</dfn>,
   2692    <dfn>unenumerable</dfn>, <dfn>determine the value of a named property</dfn>, <dfn>platform array
   2693    objects</dfn>, and <dfn data-x="dfn-read-only-array">read only</dfn> (when applied to arrays)
   2694    are used as defined in the Web IDL specification. The algorithm to <dfn>convert a DOMString to a
   2695    sequence of Unicode characters</dfn> is similarly that defined in the Web IDL specification.</p>
   2696 
   2697    <p>When this specification requires a user agent to <dfn>create a <code>Date</code> object</dfn>
   2698    representing a particular time (which could be the special value Not-a-Number), the milliseconds
   2699    component of that time, if any, must be truncated to an integer, and the time value of the newly
   2700    created <code>Date</code> object must represent the resulting truncated time.</p>
   2701 
   2702    <p class="example">For instance, given the time 23045 millionths of a second after 01:00 UTC on
   2703    January 1st 2000, i.e. the time 2000-01-01T00:00:00.023045Z, then the <code>Date</code> object
   2704    created representing that time would represent the same time as that created representing the
   2705    time 2000-01-01T00:00:00.023Z, 45 millionths earlier. If the given time is NaN, then the result
   2706    is a <code>Date</code> object that represents a time value NaN (indicating that the object does
   2707    not represent a specific instant of time).</p>
   2708 
   2709   </dd>
   2710 
   2711 
   2712   <dt>JavaScript</dt>
   2713 
   2714   <dd>
   2715 
   2716    <p>Some parts of the language described by this specification only support JavaScript as the
   2717    underlying scripting language. <a href="#refsECMA262">[ECMA262]</a></p>
   2718 
   2719    <p class="note">The term "JavaScript" is used to refer to ECMA262, rather than the official term
   2720    ECMAScript, since the term JavaScript is more widely known. Similarly, the <span>MIME
   2721    type</span> used to refer to JavaScript in this specification is <code
   2722    data-x="">text/javascript</code>, since that is the most commonly used type, <span data-x="willful
   2723    violation">despite it being an officially obsoleted type</span> according to RFC 4329. <a
   2724    href="#refsRFC4329">[RFC4329]</a></p>
   2725 
   2726    <p>The term <dfn>JavaScript global environment</dfn> refers to the <i data-x="">global
   2727    environment</i> concept defined in the ECMAScript specification.</p>
   2728 
   2729    <p>The ECMAScript <dfn data-x="js-SyntaxError"><code>SyntaxError</code></dfn> exception is also
   2730    defined in the ECMAScript specification. <a href="#refsECMA262">[ECMA262]</a></p>
   2731 
   2732    <p>The <dfn>ArrayBuffer</dfn> and related object types and underlying concepts from the
   2733    ECMAScript Specification are used for several features in this specification. <a
   2734    href="#refsECMA262">[ECMA262]</a></p>
   2735 
   2736    <p>The following helper IDL is used for referring to <code>ArrayBuffer</code>-related types:</p>
   2737 
   2738    <pre class="idl">typedef (<dfn>Int8Array</dfn> or <dfn>Uint8Array</dfn> or <dfn>Uint8ClampedArray</dfn> or
   2739         <dfn>Int16Array</dfn> or <dfn>Uint16Array</dfn> or
   2740         <dfn>Int32Array</dfn> or <dfn>Uint32Array</dfn> or
   2741         <dfn>Float32Array</dfn> or <dfn>Float64Array</dfn> or
   2742         <dfn>DataView</dfn>) <dfn>ArrayBufferView</dfn>;</pre>
   2743 
   2744    <p class="note">In particular, the <code>Uint8ClampedArray</code> type is used by some <span
   2745    data-x="ImageData">2D canvas APIs</span>, and the <a href="#network"><code>WebSocket</code>
   2746    API</a> uses <code>ArrayBuffer</code> objects for handling binary frames.</p>
   2747 
   2748   </dd>
   2749 
   2750 
   2751   <dt>DOM</dt>
   2752 
   2753   <dd>
   2754 
   2755    <p>The Document Object Model (DOM) is a representation &mdash; a model &mdash; of a document and
   2756    its content. The DOM is not just an API; the conformance criteria of HTML implementations are
   2757    defined, in this specification, in terms of operations on the DOM. <a
   2758    href="#refsDOM">[DOM]</a></p>
   2759 
   2760    <p>Implementations must support DOM and the events defined in DOM Events, because this
   2761    specification is defined in terms of the DOM, and some of the features are defined as extensions
   2762    to the DOM interfaces. <a href="#refsDOM">[DOM]</a> <a href="#refsDOMEVENTS">[DOMEVENTS]</a></p>
   2763 
   2764    <p>In particular, the following features are defined in the DOM specification: <a
   2765    href="#refsDOM">[DOM]</a></p> <!-- aka DOM Core or DOMCORE -->
   2766 
   2767    <ul class="brief">
   2768 
   2769     <li><dfn><code>Attr</code></dfn> interface</li>
   2770     <li><dfn><code>Comment</code></dfn> interface</li>
   2771     <li><dfn><code>DOMImplementation</code></dfn> interface</li>
   2772     <li><dfn data-x="DOM Document"><code>Document</code></dfn> interface</li>
   2773     <li><dfn><code>XMLDocument</code></dfn> interface</li>
   2774     <li><dfn><code>DocumentFragment</code></dfn> interface</li>
   2775     <li><dfn><code>DocumentType</code></dfn> interface</li>
   2776     <li><dfn><code>DOMException</code></dfn> interface</li>
   2777     <li><dfn><code>ChildNode</code></dfn> interface</li>
   2778     <li><dfn><code>Element</code></dfn> interface</li>
   2779     <li><dfn><code>Node</code></dfn> interface</li>
   2780     <li><dfn><code>NodeList</code></dfn> interface</li>
   2781     <li><dfn><code>ProcessingInstruction</code></dfn> interface</li>
   2782     <li><dfn><code>Text</code></dfn> interface</li>
   2783 
   2784     <li><dfn><code>HTMLCollection</code></dfn> interface</li>
   2785     <li><dfn data-x="dom-HTMLCollection-item"><code>item()</code></dfn> method</li>
   2786     <li>The terms <dfn>collections</dfn> and <dfn>represented by the collection</dfn></li>
   2787 
   2788     <li><dfn><code>DOMTokenList</code></dfn> interface</li>
   2789     <li><dfn><code>DOMSettableTokenList</code></dfn> interface</li>
   2790 
   2791     <li><dfn data-x="dom-DOMImplementation-createDocument"><code>createDocument()</code></dfn> method</li>
   2792     <li><dfn data-x="dom-DOMImplementation-createHTMLDocument"><code>createHTMLDocument()</code></dfn> method</li>
   2793     <li><dfn data-x="dom-Document-createElement"><code>createElement()</code></dfn> method</li>
   2794     <li><dfn data-x="dom-Document-createElementNS"><code>createElementNS()</code></dfn> method</li>
   2795     <li><dfn data-x="dom-Document-getElementById"><code>getElementById()</code></dfn> method</li>
   2796     <li><dfn data-x="dom-Node-insertBefore"><code>insertBefore()</code></dfn> method</li>
   2797 
   2798     <li><dfn data-x="dom-Node-ownerDocument"><code>ownerDocument</code></dfn> attribute</li>
   2799     <li><dfn data-x="dom-Node-childNodes"><code>childNodes</code></dfn> attribute</li>
   2800     <li><dfn data-x="dom-Node-localName"><code>localName</code></dfn> attribute</li>
   2801     <li><dfn data-x="dom-Node-parentNode"><code>parentNode</code></dfn> attribute</li>
   2802     <li><dfn data-x="dom-Node-namespaceURI"><code>namespaceURI</code></dfn> attribute</li>
   2803     <li><dfn data-x="dom-Element-tagName"><code>tagName</code></dfn> attribute</li>
   2804     <li><dfn data-x="dom-Element-id"><code>id</code></dfn> attribute</li>
   2805     <li><dfn><code>textContent</code></dfn> attribute</li>
   2806 
   2807     <li>The <dfn data-x="concept-node-insert">insert</dfn>, <dfn data-x="concept-node-append">append</dfn>, <dfn data-x="concept-node-remove">remove</dfn>, <dfn data-x="concept-node-replace">replace</dfn>, and <dfn data-x="concept-node-adopt">adopt</dfn> algorithms for nodes</li>
   2808     <li>The <dfn>nodes are inserted</dfn> and <dfn>nodes are removed</dfn> concepts</li>
   2809     <li>An element's <dfn data-x="concept-node-adopt-ext">adopting steps</dfn></li>
   2810     <li>The <dfn>attribute list</dfn> concept.</li>
   2811     <li>The <dfn data-x="concept-cd-data">data</dfn> of a text node.</li>
   2812 
   2813     <li><dfn><code>Event</code></dfn> interface</li>
   2814     <li><dfn><code>EventTarget</code></dfn> interface</li>
   2815     <li><dfn><code>EventInit</code></dfn> dictionary type</li>
   2816     <li><dfn data-x="dom-Event-target"><code>target</code></dfn> attribute</li>
   2817     <li><dfn data-x="dom-Event-isTrusted"><code>isTrusted</code></dfn> attribute</li>
   2818     <li>The <dfn data-x="concept-event-type">type</dfn> of an event</li>
   2819     <li>The concept of an <dfn data-x=concept-event-listener>event listener</dfn> and the <span data-x=concept-event-listener>event listeners</span> associated with an <code>EventTarget</code></li>
   2820     <li>The concept of a <dfn>target override</dfn></li>
   2821     <li>The concept of a regular <dfn>event parent</dfn> and a <dfn>cross-boundary event parent</dfn></li> <!-- see bug 18780 -->
   2822 
   2823     <li>The <dfn data-x="document's character encoding">encoding</dfn> (herein the <i>character encoding</i>) and <dfn data-x="concept-document-content-type">content type</dfn> of a <code>Document</code></li>
   2824     <li>The distinction between <dfn>XML documents</dfn> and <dfn>HTML documents</dfn></li>
   2825     <li>The terms <dfn>quirks mode</dfn>, <dfn>limited-quirks mode</dfn>, and <dfn>no-quirks mode</dfn></li>
   2826     <li>The algorithm to <dfn data-x="concept-node-clone">clone</dfn> a <code>Node</code>, and the concept of <dfn data-x="concept-node-clone-ext">cloning steps</dfn> used by that algorithm</li>
   2827     <li>The concept of <dfn>base URL change steps</dfn> and the definition of what happens when an element is <dfn>affected by a base URL change</dfn></li>
   2828     <li>The concept of an element's <dfn data-x="concept-id">unique identifier (ID)</dfn></li>
   2829 
   2830     <li>The concept of a DOM <dfn data-x="concept-range">range</dfn>, and the terms <dfn data-x="concept-range-start">start</dfn>, <dfn data-x="concept-range-end">end</dfn>, and <dfn data-x="concept-range-bp">boundary point</dfn> as applied to ranges.</li>
   2831 
   2832     <li><dfn><code>MutationObserver</code></dfn> interface</li>
   2833     <li>The <dfn data-x="concept-mo-invoke">invoke <code>MutationObserver</code> objects</dfn> algorithm</li>
   2834 
   2835     <li><dfn>Promise</dfn> interface</li>
   2836     <li>The <dfn data-x="concept-resolver">resolver</dfn> concept</li>
   2837     <li>The <dfn data-x="concept-resolver-fulfill">fulfill</dfn> and <dfn data-x="concept-resolver-reject">reject</dfn> algorithms</li>
   2838 
   2839    </ul>
   2840 
   2841    <p>The term <dfn>throw</dfn> in this specification is used as defined in the DOM specification.
   2842    The following <code>DOMException</code> types are defined in the DOM specification: <a
   2843    href="#refsDOM">[DOM]</a></p>
   2844 
   2845    <ol class="brief">
   2846     <li value="1"><dfn><code>IndexSizeError</code></dfn></li>
   2847     <li value="3"><dfn><code>HierarchyRequestError</code></dfn></li>
   2848     <li value="4"><dfn><code>WrongDocumentError</code></dfn></li>
   2849     <li value="5"><dfn><code>InvalidCharacterError</code></dfn></li>
   2850     <li value="7"><dfn><code>NoModificationAllowedError</code></dfn></li>
   2851     <li value="8"><dfn><code>NotFoundError</code></dfn></li>
   2852     <li value="9"><dfn><code>NotSupportedError</code></dfn></li>
   2853     <li value="11"><dfn><code>InvalidStateError</code></dfn></li>
   2854     <li value="12"><dfn><code>SyntaxError</code></dfn></li>
   2855     <li value="13"><dfn><code>InvalidModificationError</code></dfn></li>
   2856     <li value="14"><dfn><code>NamespaceError</code></dfn></li>
   2857     <li value="15"><dfn><code>InvalidAccessError</code></dfn></li>
   2858     <li value="18"><dfn><code>SecurityError</code></dfn></li>
   2859     <li value="19"><dfn><code>NetworkError</code></dfn></li>
   2860     <li value="20"><dfn><code>AbortError</code></dfn></li>
   2861     <li value="21"><dfn><code>URLMismatchError</code></dfn></li>
   2862     <li value="22"><dfn><code>QuotaExceededError</code></dfn></li>
   2863     <li value="23"><dfn><code>TimeoutError</code></dfn></li>
   2864     <li value="24"><dfn><code>InvalidNodeTypeError</code></dfn></li>
   2865     <li value="25"><dfn><code>DataCloneError</code></dfn></li>
   2866    </ol>
   2867 
   2868    <p class="example">For example, to <i>throw a <code>TimeoutError</code> exception</i>, a user
   2869    agent would construct a <code>DOMException</code> object whose type was the string "<code
   2870    data-x="">TimeoutError</code>" (and whose code was the number 23, for legacy reasons) and
   2871    actually throw that object as an exception.</p>
   2872 
   2873    <p>The following features are defined in the DOM Events specification: <a
   2874    href="#refsDOMEVENTS">[DOMEVENTS]</a></p>
   2875 
   2876    <ul class="brief">
   2877 
   2878     <li><dfn><code>MouseEvent</code></dfn> interface</li>
   2879     <li><dfn><code>MouseEventInit</code></dfn> dictionary type</li>
   2880 
   2881     <li>The <dfn><code>FocusEvent</code></dfn> interface and its <dfn data-x="dom-FocusEvent-relatedTarget"><code>relatedTarget</code></dfn> attribute</li>
   2882 
   2883     <li>The <dfn><code>UIEvent</code></dfn> interface's <dfn data-x="dom-UIEvent-detail"><code>detail</code></dfn> attribute</li>
   2884 
   2885     <li><dfn data-x="event-click"><code>click</code></dfn> event</li>
   2886     <li><dfn data-x="event-dblclick"><code>dblclick</code></dfn> event</li>
   2887     <li><dfn data-x="event-mousedown"><code>mousedown</code></dfn> event</li>
   2888     <li><dfn data-x="event-mouseenter"><code>mouseenter</code></dfn> event</li>
   2889     <li><dfn data-x="event-mouseleave"><code>mouseleave</code></dfn> event</li>
   2890     <li><dfn data-x="event-mousemove"><code>mousemove</code></dfn> event</li>
   2891     <li><dfn data-x="event-mouseout"><code>mouseout</code></dfn> event</li>
   2892     <li><dfn data-x="event-mouseover"><code>mouseover</code></dfn> event</li>
   2893     <li><dfn data-x="event-mouseup"><code>mouseup</code></dfn> event</li>
   2894     <li><dfn data-x="event-mousewheel"><code>mousewheel</code></dfn> event</li>
   2895 
   2896     <li><dfn data-x="event-keydown"><code>keydown</code></dfn> event</li>
   2897     <li><dfn data-x="event-keyup"><code>keyup</code></dfn> event</li>
   2898     <li><dfn data-x="event-keypress"><code>keypress</code></dfn> event</li>
   2899 
   2900    </ul>
   2901 
   2902    <p>The following features are defined in the Touch Events specification: <a
   2903    href="#refsTOUCH">[TOUCH]</a></p>
   2904 
   2905    <ul class="brief">
   2906 
   2907     <li><dfn><code>Touch</code></dfn> interface</li>
   2908 
   2909     <li><dfn>Touch point</dfn> concept</li>
   2910 
   2911    </ul>
   2912 
   2913    <p>This specification sometimes uses the term <dfn data-x="">name</dfn> to refer to the event's
   2914    <code data-x="dom-event-type">type</code>; as in, "an event named <code data-x="">click</code>"
   2915    or "if the event name is <code data-x="">keypress</code>". The terms "name" and "type" for
   2916    events are synonymous.</p>
   2917 
   2918    <p>The following features are defined in the DOM Parsing and Serialisation specification: <a
   2919    href="#refsDOMPARSING">[DOMPARSING]</a></p>
   2920 
   2921    <ul class="brief">
   2922     <li><dfn data-x="dom-innerHTML"><code>innerHTML</code></dfn></li>
   2923     <li><dfn data-x="dom-outerHTML"><code>outerHTML</code></dfn></li>
   2924    </ul>
   2925 
   2926    <p class="note">User agents are also encouraged to implement the features described in the
   2927    <cite>HTML Editing APIs</cite> and <cite><code>UndoManager</code> and DOM Transaction</cite>
   2928    specifications.
   2929    <a href="#refsEDITING">[EDITING]</a>
   2930    <a href="#refsUNDO">[UNDO]</a>
   2931    </p>
   2932 
   2933    <p>The following parts of the Fullscreen specification are referenced from this specification,
   2934    in part to define the rendering of <code>dialog</code> elements, and also to define how the
   2935    Fullscreen API interacts with the sandboxing features in HTML: <a
   2936    href="#refsFULLSCREEN">[FULLSCREEN]</a></p>
   2937 
   2938    <ul class="brief">
   2939     <li>The <dfn>top layer</dfn> concept</li>
   2940     <li><dfn data-x="dom-element-requestFullscreen"><code>requestFullscreen()</code></dfn>
   2941     <li>The <dfn>fullscreen enabled flag</dfn></li>
   2942     <li>The <dfn>fully exit fullscreen</dfn> algorithm</li>
   2943    </ul>
   2944 
   2945   </dd>
   2946 
   2947 
   2948 
   2949   <dt>File API</dt>
   2950 
   2951   <dd>
   2952 
   2953    <p>This specification uses the following features defined in the File API specification: <a
   2954    href="#refsFILEAPI">[FILEAPI]</a></p>
   2955 
   2956    <ul class="brief">
   2957 
   2958     <li><dfn><code>Blob</code></dfn></li>
   2959     <li><dfn><code>File</code></dfn></li>
   2960     <li><dfn><code>FileList</code></dfn></li>
   2961     <li><dfn data-x="dom-Blob-close"><code>Blob.close()</code></dfn></li>
   2962     <li><dfn data-x="dom-Blob-type"><code>Blob.type</code></dfn></li>
   2963     <li>The concept of <dfn data-x="file-error-read">read errors</dfn></li>
   2964    </ul>
   2965 
   2966   </dd>
   2967 
   2968 
   2969   <dt>XMLHttpRequest</dt>
   2970 
   2971   <dd>
   2972 
   2973    <p>This specification references the XMLHttpRequest specification to describe how the two
   2974    specifications interact and to use its <code>ProgressEvent</code> features. The following
   2975    features and terms are defined in the XMLHttpRequest specification: <a
   2976    href="#refsXHR">[XHR]</a></p>
   2977 
   2978    <ul class="brief">
   2979 
   2980     <li><dfn><code>XMLHttpRequest</code></dfn>
   2981     <li><dfn><code>ProgressEvent</code></dfn>
   2982     <li><dfn data-x="fire a progress event">Fire a progress event named <var data-x="">e</var></dfn>
   2983 
   2984    </ul>
   2985 
   2986   </dd>
   2987 
   2988 
   2989 <!--TOPIC:HTML-->
   2990 
   2991   <dt>Media Queries</dt>
   2992 
   2993   <dd>
   2994 
   2995    <p>Implementations must support the Media Queries language. <a href="#refsMQ">[MQ]</a></p>
   2996 
   2997   </dd>
   2998 
   2999 
   3000   <dt>CSS modules</dt>
   3001 
   3002   <dd>
   3003 
   3004    <p>While support for CSS as a whole is not required of implementations of this specification
   3005    (though it is encouraged, at least for Web browsers), some features are defined in terms of
   3006    specific CSS requirements.</p>
   3007 
   3008    <p>In particular, some features require that a string be <dfn>parsed as a CSS &lt;color&gt;
   3009    value</dfn>. When parsing a CSS value, user agents are required by the CSS specifications to
   3010    apply some error handling rules. These apply to this specification also. <a
   3011    href="#refsCSSCOLOR">[CSSCOLOR]</a> <a href="#refsCSS">[CSS]</a></p>
   3012 
   3013    <p class="example">For example, user agents are required to close all open constructs upon
   3014    finding the end of a style sheet unexpectedly. Thus, when parsing the string "<code
   3015    data-x="">rgb(0,0,0</code>" (with a missing close-parenthesis) for a colour value, the close
   3016    parenthesis is implied by this error handling rule, and a value is obtained (the colour 'black').
   3017    However, the similar construct "<code data-x="">rgb(0,0,</code>" (with both a missing parenthesis
   3018    and a missing "blue" value) cannot be parsed, as closing the open construct does not result in a
   3019    viable value.</p>
   3020 
   3021    <p>The term <dfn>CSS element reference identifier</dfn> is used as defined in the <cite>CSS
   3022    Image Values and Replaced Content</cite> specification to define the API that declares
   3023    identifiers for use with the CSS 'element()' function. <a
   3024    href="#refsCSSIMAGES">[CSSIMAGES]</a></p>
   3025 
   3026    <p>Similarly, the term <dfn>provides a paint source</dfn> is used as defined in the <cite>CSS
   3027    Image Values and Replaced Content</cite> specification to define the interaction of certain HTML
   3028    elements with the CSS 'element()' function. <a href="#refsCSSIMAGES">[CSSIMAGES]</a></p>
   3029 
   3030    <p>The term <dfn>default object size</dfn> is also defined in the <cite>CSS Image Values and
   3031    Replaced Content</cite> specification. <a href="#refsCSSIMAGES">[CSSIMAGES]</a></p>
   3032 
   3033    <p>Implementations that support scripting must support the CSS Object Model. The following
   3034    features and terms are defined in the CSSOM specifications: <a href="#refsCSSOM">[CSSOM]</a> <a
   3035    href="#refsCSSOMVIEW">[CSSOMVIEW]</a>
   3036 
   3037    <ul class="brief">
   3038     <li><dfn><code>Screen</code></dfn></li>
   3039     <li><dfn><code>LinkStyle</code></dfn></li>
   3040     <li><dfn><code>CSSStyleDeclaration</code></dfn></li>
   3041     <li><dfn data-x="dom-CSSStyleDeclaration-cssText"><code>cssText</code></dfn> attribute of <code>CSSStyleDeclaration</code></li>
   3042     <li><dfn><code>StyleSheet</code></dfn></li>
   3043     <li>The terms <dfn>create a CSS style sheet</dfn>, <dfn>remove a CSS style sheet</dfn>, and <dfn>associated CSS style sheet</dfn></li>
   3044     <li><dfn data-x="CSS style sheet">CSS style sheets</dfn> and their properties:
   3045         <dfn data-x="concept-css-style-sheet-type">type</dfn>,
   3046         <dfn data-x="concept-css-style-sheet-location">location</dfn>,
   3047         <dfn data-x="concept-css-style-sheet-parent-CSS-style-sheet">parent CSS style sheet</dfn>,
   3048         <dfn data-x="concept-css-style-sheet-owner-node">owner node</dfn>,
   3049         <dfn data-x="concept-css-style-sheet-owner-CSS-rule">owner CSS rule</dfn>,
   3050         <dfn data-x="concept-css-style-sheet-media">media</dfn>,
   3051         <dfn data-x="concept-css-style-sheet-title">title</dfn>,
   3052         <dfn data-x="concept-css-style-sheet-alternate-flag">alternate flag</dfn>,
   3053         <dfn data-x="concept-css-style-sheet-disabeld-flag">disabled flag</dfn>,
   3054         <dfn data-x="concept-css-style-sheet-CSS-rules">CSS rules</dfn>,
   3055         <dfn data-x="concept-css-style-sheet-origin-clean-flag">origin-clean flag</dfn>
   3056     </li>
   3057     <li><dfn>Alternative style sheet sets</dfn> and the <dfn>preferred style sheet set</dfn></li>
   3058     <li><dfn>Serializing a CSS value</dfn></li>
   3059     <li><dfn>Scroll an element into view</dfn></li>
   3060     <li><dfn>Scroll to the beginning of the document</dfn></li>
   3061     <li>The <dfn data-x="event-resize"><code>resize</code></dfn> event</li>
   3062     <li>The <dfn data-x="event-scroll"><code>scroll</code></dfn> event</li>
   3063    </ul>
   3064 
   3065    <p>The term <dfn>environment encoding</dfn> is defined in the <cite>CSS Syntax</cite>
   3066    specifications. <a href="#refsCSSSYNTAX">[CSSSYNTAX]</a></p>
   3067 
   3068    <p>The term <dfn>CSS styling attribute</dfn> is defined in the <cite>CSS Style Attributes</cite>
   3069    specification. <a href="#refsCSSATTR">[CSSATTR]</a></p>
   3070 
   3071    <p>The <code>CanvasRenderingContext2D</code> object's use of fonts depends on the features
   3072    described in the CSS <cite>Fonts</cite> and <cite>Font Load Events</cite> specifications, including in particular
   3073    <dfn><code>FontLoader</code></dfn>. <a href="#refsCSSFONTS">[CSSFONTS]</a> <a
   3074    href="#refsCSSFONTLOAD">[CSSFONTLOAD]</a></p>
   3075 
   3076   </dd>
   3077 
   3078 
   3079 <!--TOPIC:Canvas-->
   3080 
   3081   <dt>SVG</dt>
   3082 
   3083   <dd>
   3084 
   3085    <p>The following interface is defined in the SVG specification: <a href="#refsSVG">[SVG]</a></p>
   3086 
   3087    <ul class="brief">
   3088     <li><dfn><code>SVGMatrix</code></dfn>
   3089    </ul>
   3090 
   3091    <!-- mention that the parser supports it? -->
   3092 
   3093   </dd>
   3094 
   3095 
   3096   <dt>WebGL</dt>
   3097 
   3098   <dd>
   3099 
   3100    <p>The following interface is defined in the WebGL specification: <a
   3101    href="#refsWEBGL">[WEBGL]</a></p>
   3102 
   3103    <ul class="brief">
   3104     <li><dfn><code>WebGLRenderingContext</code></dfn>
   3105    </ul>
   3106 
   3107   </dd>
   3108 
   3109 
   3110 <!--TOPIC:HTML-->
   3111 
   3112   <!-- mention that the parser supports mathml? -->
   3113 
   3114 
   3115 <!--TOPIC:Video Text Tracks-->
   3116 
   3117   <dt>WebVTT</dt>
   3118 
   3119   <dd>
   3120 
   3121    <p>Implementations may support <dfn>WebVTT</dfn> as a text track format for subtitles, captions,
   3122    chapter titles, metadata, etc, for media resources. <a href="#refsWEBVTT">[WEBVTT]</a></p>
   3123 
   3124    <p>The following terms, used in this specification, are defined in the WebVTT specification:</p>
   3125 
   3126    <ul class="brief">
   3127     <li><dfn>WebVTT file</dfn>
   3128     <li><dfn>WebVTT file using cue text</dfn>
   3129     <li><dfn>WebVTT file using chapter title text</dfn>
   3130     <li><dfn>WebVTT file using only nested cues</dfn>
   3131     <li><dfn>WebVTT parser</dfn>
   3132     <li>The <dfn>rules for updating the display of WebVTT text tracks</dfn>
   3133     <li>The <dfn>rules for interpreting WebVTT cue text</dfn>
   3134     <li>The WebVTT <dfn>text track cue writing direction</dfn>
   3135    </ul>
   3136 
   3137   </dd>
   3138 
   3139 
   3140 <!--TOPIC:WebSocket API-->
   3141 
   3142   <dt>The WebSocket protocol</dt>
   3143 
   3144   <dd>
   3145 
   3146    <p>The following terms are defined in the WebSocket protocol specification: <a
   3147    href="#refsWSP">[WSP]</a></p>
   3148 
   3149    <ul class="brief">
   3150 
   3151     <li><dfn>establish a WebSocket connection</dfn>
   3152     <li><dfn>the WebSocket connection is established</dfn>
   3153     <li><dfn>validate the server's response</dfn>
   3154     <li><dfn>extensions in use</dfn>
   3155     <li><dfn>subprotocol in use</dfn>
   3156     <li><dfn>headers to send appropriate cookies</dfn>
   3157     <li><dfn>cookies set during the server's opening handshake</dfn>
   3158     <li><dfn>a WebSocket message has been received</dfn>
   3159     <li><dfn>send a WebSocket Message</dfn>
   3160     <li><dfn>fail the WebSocket connection</dfn>
   3161     <li><dfn>close the WebSocket connection</dfn>
   3162     <li><dfn>start the WebSocket closing handshake</dfn>
   3163     <li><dfn>the WebSocket closing handshake is started</dfn>
   3164     <li><dfn>the WebSocket connection is closed</dfn> (possibly <i data-x="">cleanly</i>)
   3165     <li><dfn>the WebSocket connection close code</dfn>
   3166     <li><dfn>the WebSocket connection close reason</dfn>
   3167 
   3168    </ul>
   3169 
   3170   </dd>
   3171 
   3172 
   3173 <!--TOPIC:HTML-->
   3174 
   3175   <dt>ARIA</dt>
   3176 
   3177   <dd>
   3178 
   3179    <p>The terms <dfn>strong native semantics</dfn> is used as defined in the ARIA specification.
   3180    The term <dfn>default implicit ARIA semantics</dfn> has the same meaning as the term <i>implicit
   3181    WAI-ARIA semantics</i> as used in the ARIA specification. <a href="#refsARIA">[ARIA]</a></p>
   3182 
   3183    <p>The <dfn data-x="attr-aria-role"><code>role</code></dfn> and <code data-x="">aria-*</code>
   3184    attributes are defined in the ARIA specification. <a href="#refsARIA">[ARIA]</a></p>
   3185 
   3186 
   3187   </dd>
   3188 
   3189 
   3190  </dl>
   3191 
   3192  <p>This specification does not <em>require</em> support of any particular network protocol, style
   3193  sheet language, scripting language, or any of the DOM specifications beyond those required in the
   3194  list above. However, the language described by this specification is biased towards CSS as the
   3195  styling language, JavaScript as the scripting language, and HTTP as the network protocol, and
   3196  several features assume that those languages and protocols are in use.</p>
   3197 
   3198  <p>A user agent that implements the HTTP protocol must implement the Web Origin Concept
   3199  specification and the HTTP State Management Mechanism specification (Cookies) as well. <a
   3200  href="#refsHTTP">[HTTP]</a> <a href="#refsORIGIN">[ORIGIN]</a> <a
   3201  href="#refsCOOKIES">[COOKIES]</a></p>
   3202 
   3203  <p class="note">This specification might have certain additional requirements on character
   3204  encodings, image formats, audio formats, and video formats in the respective sections.</p>
   3205 
   3206  </div>
   3207 
   3208  </div>
   3209 
   3210 
   3211  <h4>Extensibility</h4>
   3212 
   3213  <p>Vendor-specific proprietary user agent extensions to this specification are strongly
   3214  discouraged. Documents must not use such extensions, as doing so reduces interoperability and
   3215  fragments the user base, allowing only users of specific user agents to access the content in
   3216  question.</p>
   3217 
   3218  <div class="nodev">
   3219 
   3220  <p>If such extensions are nonetheless needed, e.g. for experimental purposes, then vendors are
   3221  strongly urged to use one of the following extension mechanisms:</p>
   3222 
   3223  <ul>
   3224 
   3225   <li><p>For markup-level features that can be limited to the XML serialisation and need not be
   3226   supported in the HTML serialisation, vendors should use the namespace mechanism to define custom
   3227   namespaces in which the non-standard elements and attributes are supported.</p>
   3228 
   3229   <li>
   3230 
   3231    <p>For markup-level features that are intended for use with <span>the HTML syntax</span>,
   3232    extensions should be limited to new attributes of the form "<code data-x="">x-<var
   3233    data-x="">vendor</var>-<var data-x="">feature</var></code>", where <var data-x="">vendor</var> is a
   3234    short string that identifies the vendor responsible for the extension, and <var
   3235    data-x="">feature</var> is the name of the feature. New element names should not be created.
   3236    Using attributes for such extensions exclusively allows extensions from multiple vendors to
   3237    co-exist on the same element, which would not be possible with elements. Using the "<code
   3238    data-x="">x-<var data-x="">vendor</var>-<var data-x="">feature</var></code>" form allows extensions
   3239    to be made without risk of conflicting with future additions to the specification.</p>
   3240 
   3241    <div class="example">
   3242 
   3243     <p>For instance, a browser named "FerretBrowser" could use "ferret" as a vendor prefix, while a
   3244     browser named "Mellblom Browser" could use "mb". If both of these browsers invented extensions
   3245     that turned elements into scratch-and-sniff areas, an author experimenting with these features
   3246     could write:</p>
   3247 
   3248     <pre>&lt;p>This smells of lemons!
   3249 &lt;span x-ferret-smellovision x-ferret-smellcode="LEM01"
   3250      x-mb-outputsmell x-mb-smell="lemon juice">&lt;/span>&lt;/p></pre>
   3251 
   3252    </div>
   3253 
   3254   </li>
   3255 
   3256  </ul>
   3257 
   3258  <p>Attribute names beginning with the two characters "<code data-x="">x-</code>" are reserved for
   3259  user agent use and are guaranteed to never be formally added to the HTML language. For
   3260  flexibility, attributes names containing underscores (the U+005F LOW LINE character) are also
   3261  reserved for experimental purposes and are guaranteed to never be formally added to the HTML
   3262  language.</p>
   3263 
   3264  <p class="note">Pages that use such attributes are by definition non-conforming.</p>
   3265 
   3266  <p>For DOM extensions, e.g. new methods and IDL attributes, the new members should be prefixed by
   3267  vendor-specific strings to prevent clashes with future versions of this specification.</p>
   3268 
   3269  <p>For events, experimental event types should be prefixed with vendor-specific strings.</p>
   3270 
   3271  <div class="example">
   3272 
   3273   <p>For example, if a user agent called "Pleas<!--e h-->old" were to add an event to indicate when
   3274   the user is going up in an elevator, it could use the prefix "<code data-x="">pleasold</code>" and
   3275   thus name the event "<code data-x="">pleasoldgoingup</code>", possibly with an event handler
   3276   attribute named "<code data-x="">onpleasoldgoingup</code>".</p>
   3277 
   3278  </div>
   3279 
   3280  <p>All extensions must be defined so that the use of extensions neither contradicts nor causes the
   3281  non-conformance of functionality defined in the specification.</p> <!-- thanks to QA Framework -->
   3282 
   3283  <div class="example">
   3284 
   3285   <p>For example, while strongly discouraged from doing so, an implementation "Foo Browser" could
   3286   add a new IDL attribute "<code data-x="">fooTypeTime</code>" to a control's DOM interface that
   3287   returned the time it took the user to select the current value of a control (say). On the other
   3288   hand, defining a new control that appears in a form's <code
   3289   data-x="dom-form-elements">elements</code> array would be in violation of the above requirement,
   3290   as it would violate the definition of <code data-x="dom-form-elements">elements</code> given in
   3291   this specification.</p>
   3292 
   3293  </div>
   3294 
   3295  <p>When adding new <span data-x="reflect">reflecting</span> IDL attributes corresponding to content
   3296  attributes of the form "<code data-x="">x-<var data-x="">vendor</var>-<var
   3297  data-x="">feature</var></code>", the IDL attribute should be named "<code data-x=""><var
   3298  data-x="">vendor</var><var data-x="">Feature</var></code>" (i.e. the "<code data-x="">x</code>" is
   3299  dropped from the IDL attribute's name).</p>
   3300 
   3301  </div>
   3302 
   3303  <hr>
   3304 
   3305  <p>When vendor-neutral extensions to this specification are needed, either this specification can
   3306  be updated accordingly, or an extension specification can be written that overrides the
   3307  requirements in this specification. When someone applying this specification to their activities
   3308  decides that they will recognise the requirements of such an extension specification, it becomes
   3309  an <dfn data-x="other applicable specifications">applicable specification</dfn> for the purposes of
   3310  conformance requirements in this specification.</p>
   3311 
   3312  <p class="note">Someone could write a specification that defines any arbitrary byte stream as
   3313  conforming, and then claim that their random junk is conforming. However, that does not mean that
   3314  their random junk actually is conforming for everyone's purposes: if someone else decides that
   3315  that specification does not apply to their work, then they can quite legitimately say that the
   3316  aforementioned random junk is just that, junk, and not conforming at all. As far as conformance
   3317  goes, what matters in a particular community is what that community <em>agrees</em> is
   3318  applicable.</p>
   3319 
   3320  <div class="nodev">
   3321 
   3322  <hr>
   3323 
   3324  <p>User agents must treat elements and attributes that they do not understand as semantically
   3325  neutral; leaving them in the DOM (for DOM processors), and styling them according to CSS (for CSS
   3326  processors), but not inferring any meaning from them.</p>
   3327 
   3328 <!--ADD-TOPIC:Security-->
   3329  <p>When support for a feature is disabled (e.g. as an emergency measure to mitigate a security
   3330  problem, or to aid in development, or for performance reasons), user agents must act as if they
   3331  had no support for the feature whatsoever, and as if the feature was not mentioned in this
   3332  specification. For example, if a particular feature is accessed via an attribute in a Web IDL
   3333  interface, the attribute itself would be omitted from the objects that implement that interface
   3334  &mdash; leaving the attribute on the object but making it return null or throw an exception is
   3335  insufficient.</p>
   3336 <!--REMOVE-TOPIC:Security-->
   3337 
   3338  </div>
   3339 
   3340 
   3341  <div class="nodev">
   3342 
   3343  <h4>Interactions with XPath and XSLT</h4>
   3344 
   3345  <p id="xpath-1.0-processors">Implementations of XPath 1.0 that operate on <span>HTML
   3346  documents</span> parsed or created in the manners described in this specification (e.g. as part of
   3347  the <code data-x="">document.evaluate()</code> API) must act as if the following edit was applied
   3348  to the XPath 1.0 specification.</p>
   3349 
   3350  <p>First, remove this paragraph:</p>
   3351 
   3352  <blockquote cite="http://www.w3.org/TR/1999/REC-xpath-19991116#node-tests">
   3353 
   3354   <p>A <a href="http://www.w3.org/TR/REC-xml-names#NT-QName">QName</a> in the node test is expanded
   3355   into an <a href="http://www.w3.org/TR/1999/REC-xpath-19991116#dt-expanded-name">expanded-name</a>
   3356   using the namespace declarations from the expression context. This is the same way expansion is
   3357   done for element type names in start and end-tags except that the default namespace declared with
   3358   <code data-x="">xmlns</code> is not used: if the <a
   3359   href="http://www.w3.org/TR/REC-xml-names#NT-QName">QName</a> does not have a prefix, then the
   3360   namespace URI is null (this is the same way attribute names are expanded). It is an error if the
   3361   <a href="http://www.w3.org/TR/REC-xml-names#NT-QName">QName</a> has a prefix for which there is
   3362   no namespace declaration in the expression context.</p>
   3363 
   3364  </blockquote>
   3365 
   3366  <p>Then, insert in its place the following:</p>
   3367 
   3368  <blockquote cite="http://www.w3.org/Bugs/Public/show_bug.cgi?id=7059#c37">
   3369 
   3370   <p>A QName in the node test is expanded into an expanded-name using the namespace declarations
   3371   from the expression context. If the QName has a prefix, then there must be a<!-- added 2009-10-27
   3372   - http://www.w3.org/Bugs/Public/show_bug.cgi?id=8062 --> namespace declaration for this prefix in
   3373   the expression context, and the corresponding<!-- typo fixed 2009-10-27 -
   3374   http://www.w3.org/Bugs/Public/show_bug.cgi?id=8063 --> namespace URI is the one that is
   3375   associated with this prefix. It is an error if the QName has a prefix for which there is no
   3376   namespace declaration in the expression context. </p>
   3377 
   3378   <p>If the QName has no prefix and the principal node type of the axis is element, then the
   3379   default element namespace is used. Otherwise if the QName has no prefix, the namespace URI is
   3380   null. The default element namespace is a member of the context for the XPath expression. The
   3381   value of the default element namespace when executing an XPath expression through the DOM3 XPath
   3382   API is determined in the following way:</p>
   3383 
   3384   <ol>
   3385 
   3386    <li>If the context node is from an HTML DOM, the default element namespace is
   3387    "http://www.w3.org/1999/xhtml".</li>
   3388 
   3389    <li>Otherwise, the default element namespace URI is null.</li>
   3390 
   3391   </ol>
   3392 
   3393   <p class="note">This is equivalent to adding the default element namespace feature of XPath 2.0
   3394   to XPath 1.0, and using the HTML namespace as the default element namespace for HTML documents.
   3395   It is motivated by the desire to have implementations be compatible with legacy HTML content
   3396   while still supporting the changes that this specification introduces to HTML regarding the
   3397   namespace used for HTML elements, and by the desire to use XPath 1.0 rather than XPath 2.0.</p>
   3398 
   3399  </blockquote>
   3400 
   3401  <p class="note">This change is a <span>willful violation</span> of the XPath 1.0 specification,
   3402  motivated by desire to have implementations be compatible with legacy content while still
   3403  supporting the changes that this specification introduces to HTML regarding which namespace is
   3404  used for HTML elements. <a href="#refsXPATH10">[XPATH10]</a></p> <!-- note: version matters for
   3405  this ref -->
   3406 
   3407  <hr>
   3408 
   3409  <p id="dom-based-xslt-1.0-processors">XSLT 1.0 processors outputting to a DOM when the output
   3410  method is "html" (either explicitly or via the defaulting rule in XSLT 1.0) are affected as
   3411  follows:</p>
   3412 
   3413  <p>If the transformation program outputs an element in no namespace, the processor must, prior to
   3414  constructing the corresponding DOM element node, change the namespace of the element to the
   3415  <span>HTML namespace</span>, <span data-x="converted to ASCII lowercase">ASCII-lowercase</span> the
   3416  element's local name, and <span data-x="converted to ASCII lowercase">ASCII-lowercase</span> the
   3417  names of any non-namespaced attributes on the element.</p>
   3418 
   3419  <p class="note">This requirement is a <span>willful violation</span> of the XSLT 1.0
   3420  specification, required because this specification changes the namespaces and case-sensitivity
   3421  rules of HTML in a manner that would otherwise be incompatible with DOM-based XSLT
   3422  transformations. (Processors that serialise the output are unaffected.) <a
   3423  href="#refsXSLT10">[XSLT10]</a></p> <!-- note: version matters for this ref -->
   3424 
   3425  <hr>
   3426 
   3427  <p>This specification does not specify precisely how XSLT processing interacts with the <span>HTML
   3428  parser</span> infrastructure (for example, whether an XSLT processor acts as if it puts any
   3429  elements into a <span>stack of open elements</span>). However, XSLT processors must <span>stop
   3430  parsing</span> if they successfully complete, and must set the <span>current document
   3431  readiness</span> first to "<code data-x="">interactive</code>"<!-- this synchronously fires an
   3432  event --> and then to "<code data-x="">complete</code>"<!-- this also synchronously fires an event
   3433  --> if they are aborted.</p>
   3434 
   3435  <hr>
   3436 
   3437  <p>This specification does not specify how XSLT interacts with the <span
   3438  data-x="navigate">navigation</span> algorithm, how it fits in with the <span>event loop</span>, nor
   3439  how error pages are to be handled (e.g. whether XSLT errors are to replace an incremental XSLT
   3440  output, or are rendered inline, etc).</p>
   3441 
   3442  <p class="note">There are also additional non-normative comments regarding the interaction of XSLT
   3443  and HTML <a href="#scriptTagXSLT">in the <code>script</code> element section</a>, and of
   3444  XSLT, XPath, and HTML <a href="#template-XSLT-XPath">in the <code>template</code> element
   3445  section</a>.</p>
   3446 
   3447  </div>
   3448 
   3449 
   3450 
   3451 
   3452  <h3>Case-sensitivity and string comparison</h3>
   3453 
   3454  <p>Comparing two strings in a <dfn>case-sensitive</dfn> manner means comparing them exactly, code
   3455  point for code point.</p>
   3456 
   3457  <p>Comparing two strings in an <dfn>ASCII case-insensitive</dfn> manner means comparing them
   3458  exactly, code point for code point, except that the characters in the range U+0041 to U+005A (i.e.
   3459  LATIN CAPITAL LETTER A to LATIN CAPITAL LETTER Z) and the corresponding characters in the range
   3460  U+0061 to U+007A (i.e. LATIN SMALL LETTER A to LATIN SMALL LETTER Z) are considered to also
   3461  match.</p>
   3462 
   3463  <p>Comparing two strings in a <dfn>compatibility caseless</dfn> manner means using the Unicode
   3464  <i>compatibility caseless match</i> operation to compare the two strings, with no language-specific tailoirings. <a
   3465  href="#refsUNICODE">[UNICODE]</a></p>
   3466 
   3467  <p>Except where otherwise stated, string comparisons must be performed in a
   3468  <span>case-sensitive</span> manner.</p>
   3469 
   3470 
   3471  <div class="nodev">
   3472 
   3473  <p><dfn data-x="converted to ASCII uppercase">Converting a string to ASCII uppercase</dfn> means
   3474  replacing all characters in the range U+0061 to U+007A (i.e. LATIN SMALL LETTER A to LATIN SMALL
   3475  LETTER Z) with the corresponding characters in the range U+0041 to U+005A (i.e. LATIN CAPITAL
   3476  LETTER A to LATIN CAPITAL LETTER Z).</p>
   3477 
   3478  <p><dfn data-x="converted to ASCII lowercase">Converting a string to ASCII lowercase</dfn> means
   3479  replacing all characters in the range U+0041 to U+005A (i.e. LATIN CAPITAL LETTER A to LATIN
   3480  CAPITAL LETTER Z) with the corresponding characters in the range U+0061 to U+007A (i.e. LATIN
   3481  SMALL LETTER A to LATIN SMALL LETTER Z).</p>
   3482 
   3483  </div>
   3484 
   3485 
   3486  <p>A string <var data-x="">pattern</var> is a <dfn>prefix match</dfn> for a string <var
   3487  data-x="">s</var> when <var data-x="">pattern</var> is not longer than <var data-x="">s</var> and
   3488  truncating <var data-x="">s</var> to <var data-x="">pattern</var>'s length leaves the two strings as
   3489  matches of each other.</p>
   3490 
   3491 
   3492 
   3493  <h3>Common microsyntaxes</h3>
   3494 
   3495  <p>There are various places in HTML that accept particular data types, such as dates or numbers.
   3496  This section describes what the conformance criteria for content in those formats is, and how to
   3497  parse them.</p>
   3498 
   3499  <div class="nodev">
   3500 
   3501  <p class="note">Implementors are strongly urged to carefully examine any third-party libraries
   3502  they might consider using to implement the parsing of syntaxes described below. For example, date
   3503  libraries are likely to implement error handling behavior that differs from what is required in
   3504  this specification, since error-handling behavior is often not defined in specifications that
   3505  describe date syntaxes similar to those used in this specification, and thus implementations tend
   3506  to vary greatly in how they handle errors.</p>
   3507 
   3508  </div>
   3509 
   3510 
   3511  <div class="nodev">
   3512 
   3513  <h4>Common parser idioms</h4>
   3514 
   3515  </div>
   3516 
   3517  <p>The <dfn data-x="space character">space characters</dfn>, for the purposes of this
   3518  specification, are U+0020 SPACE, U+0009 CHARACTER TABULATION (tab), U+000A LINE FEED (LF), U+000C
   3519  FORM FEED (FF), and U+000D CARRIAGE RETURN (CR).</p>
   3520 
   3521  <p>The <dfn data-x="White_Space">White_Space characters</dfn> are those that have the Unicode
   3522  property "White_Space" in the Unicode <code data-x="">PropList.txt</code> data file. <a
   3523  href="#refsUNICODE">[UNICODE]</a></p>
   3524 
   3525  <p class="note">This should not be confused with the "White_Space" value (abbreviated "WS") of the
   3526  "Bidi_Class" property in the <code data-x="">Unicode.txt</code> data file.</p>
   3527 
   3528  <p>The <dfn>control characters</dfn> are those whose Unicode "General_Category" property has the
   3529  value "Cc" in the Unicode <code data-x="">UnicodeData.txt</code> data file. <a
   3530  href="#refsUNICODE">[UNICODE]</a></p>
   3531 
   3532  <p>The <dfn>uppercase ASCII letters</dfn> are the characters in the range U+0041 LATIN CAPITAL
   3533  LETTER A to U+005A LATIN CAPITAL LETTER Z.</p>
   3534 
   3535  <p>The <dfn>lowercase ASCII letters</dfn> are the characters in the range U+0061 LATIN SMALL
   3536  LETTER A to U+007A LATIN SMALL LETTER Z.</p>
   3537 
   3538  <p>The <dfn>ASCII digits</dfn> are the characters in the range U+0030 DIGIT ZERO (0) to U+0039
   3539  DIGIT NINE (9).</p>
   3540 
   3541  <p>The <dfn>alphanumeric ASCII characters</dfn> are those that are either <span>uppercase ASCII
   3542  letters</span>, <span>lowercase ASCII letters</span>, or <span>ASCII digits</span>.</p>
   3543 
   3544  <p>The <dfn>ASCII hex digits</dfn> are the characters in the ranges U+0030 DIGIT ZERO (0) to
   3545  U+0039 DIGIT NINE (9), U+0041 LATIN CAPITAL LETTER A to U+0046 LATIN CAPITAL LETTER F, and U+0061
   3546  LATIN SMALL LETTER A to U+0066 LATIN SMALL LETTER F.</p>
   3547 
   3548  <p>The <dfn>uppercase ASCII hex digits</dfn> are the characters in the ranges U+0030 DIGIT ZERO (0) to
   3549  U+0039 DIGIT NINE (9) and U+0041 LATIN CAPITAL LETTER A to U+0046 LATIN CAPITAL LETTER F only.</p>
   3550 
   3551  <p>The <dfn>lowercase ASCII hex digits</dfn> are the characters in the ranges U+0030 DIGIT ZERO
   3552  (0) to U+0039 DIGIT NINE (9) and U+0061 LATIN SMALL LETTER A to U+0066 LATIN SMALL LETTER F
   3553  only.</p>
   3554 
   3555  <div class="nodev">
   3556 
   3557  <p>Some of the micro-parsers described below follow the pattern of having an <var
   3558  data-x="">input</var> variable that holds the string being parsed, and having a <var
   3559  data-x="">position</var> variable pointing at the next character to parse in <var
   3560  data-x="">input</var>.</p>
   3561 
   3562  <p>For parsers based on this pattern, a step that requires the user agent to <dfn>collect a
   3563  sequence of characters</dfn> means that the following algorithm must be run, with <var
   3564  data-x="">characters</var> being the set of characters that can be collected:</p>
   3565 
   3566  <ol>
   3567 
   3568   <li><p>Let <var data-x="">input</var> and <var data-x="">position</var> be the same variables as
   3569   those of the same name in the algorithm that invoked these steps.</p></li>
   3570 
   3571   <li><p>Let <var data-x="">result</var> be the empty string.</p></li>
   3572 
   3573   <li><p>While <var data-x="">position</var> doesn't point past the end of <var data-x="">input</var>
   3574   and the character at <var data-x="">position</var> is one of the <var data-x="">characters</var>,
   3575   append that character to the end of <var data-x="">result</var> and advance <var
   3576   data-x="">position</var> to the next character in <var data-x="">input</var>.</p></li>
   3577 
   3578   <li><p>Return <var data-x="">result</var>.</p></li>
   3579 
   3580  </ol>
   3581 
   3582  <p>The step <dfn>skip whitespace</dfn> means that the user agent must <span>collect a sequence of
   3583  characters</span> that are <span data-x="space character">space characters</span>. The step
   3584  <dfn>skip White_Space characters</dfn> means that the user agent must <span>collect a sequence of
   3585  characters</span> that are <span>White_Space</span> characters. In both cases, the collected
   3586  characters are not used. <a href="#refsUNICODE">[UNICODE]</a></p>
   3587 
   3588  <p>When a user agent is to <dfn>strip line breaks</dfn> from a string, the user agent must remove
   3589  any U+000A LINE FEED (LF) and U+000D CARRIAGE RETURN (CR) characters from that string.</p>
   3590 
   3591  <p>When a user agent is to <dfn>strip leading and trailing whitespace</dfn> from a string, the
   3592  user agent must remove all <span data-x="space character">space characters</span> that are at the
   3593  start or end of the string.</p>
   3594 
   3595  <p>When a user agent is to <dfn>strip and collapse whitespace</dfn> in a string, it must replace
   3596  any sequence of one or more consecutive <span data-x="space character">space characters</span> in
   3597  that string with a single U+0020 SPACE character, and then <span>strip leading and trailing
   3598  whitespace</span> from that string.</p>
   3599 
   3600  <p>When a user agent has to <dfn>strictly split a string</dfn> on a particular delimiter character
   3601  <var data-x="">delimiter</var>, it must use the following algorithm:</p>
   3602 
   3603  <ol>
   3604 
   3605   <li><p>Let <var data-x="">input</var> be the string being parsed.</p></li>
   3606 
   3607   <li><p>Let <var data-x="">position</var> be a pointer into <var data-x="">input</var>, initially
   3608   pointing at the start of the string.</p></li>
   3609 
   3610   <li><p>Let <var data-x="">tokens</var> be an ordered list of tokens, initially empty.</p></li>
   3611 
   3612   <li><p>While <var data-x="">position</var> is not past the end of <var data-x="">input</var>:</p>
   3613 
   3614    <ol>
   3615 
   3616     <li><p><span>Collect a sequence of characters</span> that are not the <var
   3617     data-x="">delimiter</var> character.</p></li>
   3618 
   3619     <li><p>Append the string collected in the previous step to <var data-x="">tokens</var>.</p></li>
   3620 
   3621     <li><p>Advance <var data-x="">position</var> to the next character in <var
   3622     data-x="">input</var>.</p></li> <!-- skips past the delimiter -->
   3623 
   3624    </ol>
   3625 
   3626   </li>
   3627 
   3628   <li><p>Return <var data-x="">tokens</var>.</p></li>
   3629 
   3630  </ol>
   3631 
   3632  <p class="note">For the special cases of splitting a string <span data-x="split a string on
   3633  spaces">on spaces</span> and <span data-x="split a string on commas">on commas</span>, this
   3634  algorithm does not apply (those algorithms also perform <span data-x="strip leading and trailing
   3635  whitespace">whitespace trimming</span>).</p>
   3636 
   3637  </div>
   3638 
   3639 
   3640 
   3641  <h4>Boolean attributes</h4>
   3642 
   3643  <p>A number of attributes are <dfn data-x="boolean attribute">boolean attributes</dfn>. The
   3644  presence of a boolean attribute on an element represents the true value, and the absence of the
   3645  attribute represents the false value.</p>
   3646 
   3647  <p>If the attribute is present, its value must either be the empty string or a value that is an
   3648  <span>ASCII case-insensitive</span> match for the attribute's canonical name, with no leading or
   3649  trailing whitespace.</p>
   3650 
   3651  <p class="note">The values "true" and "false" are not allowed on boolean attributes. To represent
   3652  a false value, the attribute has to be omitted altogether.</p>
   3653 
   3654  <div class="example">
   3655 
   3656   <p>Here is an example of a checkbox that is checked and disabled. The <code
   3657   data-x="attr-input-checked">checked</code> and <code data-x="attr-fe-disabled">disabled</code>
   3658   attributes are the boolean attributes.</p>
   3659 
   3660   <pre>&lt;label>&lt;input type=checkbox checked name=cheese disabled> Cheese&lt;/label></pre>
   3661 
   3662   <p>This could be equivalently written as this:
   3663 
   3664   <pre>&lt;label>&lt;input type=checkbox checked=checked name=cheese disabled=disabled> Cheese&lt;/label></pre>
   3665 
   3666   <p>You can also mix styles; the following is still equivalent:</p>
   3667 
   3668   <pre>&lt;label>&lt;input type='checkbox' checked name=cheese disabled=""> Cheese&lt;/label></pre>
   3669 
   3670  </div>
   3671 
   3672 
   3673 
   3674  <h4>Keywords and enumerated attributes</h4>
   3675 
   3676  <p>Some attributes are defined as taking one of a finite set of keywords. Such attributes are
   3677  called <dfn data-x="enumerated attribute">enumerated attributes</dfn>. The keywords are each
   3678  defined to map to a particular <em>state</em> (several keywords might map to the same state, in
   3679  which case some of the keywords are synonyms of each other; additionally, some of the keywords can
   3680  be said to be non-conforming, and are only in the specification for historical reasons). In
   3681  addition, two default states can be given. The first is the <i>invalid value default</i>, the
   3682  second is the <i>missing value default</i>.</p>
   3683 
   3684  <p>If an enumerated attribute is specified, the attribute's value must be an <span>ASCII
   3685  case-insensitive</span> match for one of the given keywords that are not said to be
   3686  non-conforming, with no leading or trailing whitespace.</p>
   3687 
   3688  <p>When the attribute is specified, if its value is an <span>ASCII case-insensitive</span> match
   3689  for one of the given keywords then that keyword's state is the state that the attribute
   3690  represents. If the attribute value matches none of the given keywords, but the attribute has an
   3691  <i>invalid value default</i>, then the attribute represents that state. Otherwise, if the
   3692  attribute value matches none of the keywords but there is a <i>missing value default</i> state
   3693  defined, then <em>that</em> is the state represented by the attribute. Otherwise, there is no
   3694  default, and invalid values mean that there is no state represented.</p>
   3695 
   3696  <p>When the attribute is <em>not</em> specified, if there is a <i>missing value default</i> state
   3697  defined, then that is the state represented by the (missing) attribute. Otherwise, the absence of
   3698  the attribute means that there is no state represented.</p>
   3699 
   3700  <p class="note">The empty string can be a valid keyword.</p>
   3701 
   3702 
   3703  <h4>Numbers</h4>
   3704 
   3705  <h5>Signed integers</h5>
   3706 
   3707  <p>A string is a <dfn>valid integer</dfn> if it consists of one or more <span>ASCII digits</span>,
   3708  optionally prefixed with a U+002D HYPHEN-MINUS character (-).</p>
   3709 
   3710  <p>A <span>valid integer</span> without a U+002D HYPHEN-MINUS (-) prefix represents the number
   3711  that is represented in base ten by that string of digits. A <span>valid integer</span>
   3712  <em>with</em> a U+002D HYPHEN-MINUS (-) prefix represents the number represented in base ten by
   3713  the string of digits that follows the U+002D HYPHEN-MINUS, subtracted from zero.</p>
   3714 
   3715  <div class="nodev">
   3716 
   3717  <p>The <dfn>rules for parsing integers</dfn> are as given in the following algorithm. When
   3718  invoked, the steps must be followed in the order given, aborting at the first step that returns a
   3719  value. This algorithm will return either an integer or an error.</p>
   3720 
   3721  <ol>
   3722 
   3723   <li><p>Let <var data-x="">input</var> be the string being parsed.</p></li>
   3724 
   3725   <li><p>Let <var data-x="">position</var> be a pointer into <var data-x="">input</var>, initially
   3726   pointing at the start of the string.</p></li>
   3727 
   3728   <li><p>Let <var data-x="">sign</var> have the value "positive".</p></li>
   3729 
   3730   <li><p><span>Skip whitespace</span>.</p></li>
   3731 
   3732   <li><p>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, return an
   3733    error.</p></li>
   3734 
   3735   <li>
   3736 
   3737    <p>If the character indicated by <var data-x="">position</var> (the first character) is a U+002D
   3738    HYPHEN-MINUS character (-):</p>
   3739 
   3740    <ol>
   3741 
   3742     <li>Let <var data-x="">sign</var> be "negative".</li>
   3743 
   3744     <li>Advance <var data-x="">position</var> to the next character.</li>
   3745 
   3746     <li>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, return an
   3747     error.</li>
   3748 
   3749    </ol>
   3750 
   3751    <p>Otherwise, if the character indicated by <var data-x="">position</var> (the first character)
   3752    is a U+002B PLUS SIGN character (+):</p>
   3753 
   3754    <ol>
   3755 
   3756     <li>Advance <var data-x="">position</var> to the next character. (The "<code data-x="">+</code>"
   3757     is ignored, but it is not conforming.)</li>
   3758 
   3759     <li>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, return an
   3760     error.</li>
   3761 
   3762    </ol>
   3763 
   3764   </li>
   3765 
   3766   <li><p>If the character indicated by <var data-x="">position</var> is not an <span data-x="ASCII
   3767   digits">ASCII digit</span>, then return an error.</p></li>
   3768 
   3769   <!-- Ok. At this point we know we have a number. It might have
   3770   trailing garbage which we'll ignore, but it's a number, and we
   3771   won't return an error. -->
   3772 
   3773   <li><p><span>Collect a sequence of characters</span> that are <span>ASCII digits</span>, and
   3774   interpret the resulting sequence as a base-ten integer. Let <var data-x="">value</var> be that
   3775   integer.</p></li>
   3776 
   3777   <li><p>If <var data-x="">sign</var> is "positive", return <var
   3778   data-x="">value</var>, otherwise return the result of subtracting
   3779   <var data-x="">value</var> from zero.</p></li>
   3780 
   3781  </ol>
   3782 
   3783  </div>
   3784 
   3785 
   3786  <h5>Non-negative integers</h5>
   3787 
   3788  <p>A string is a <dfn>valid non-negative integer</dfn> if it consists of one or more <span>ASCII
   3789  digits</span>.</p>
   3790 
   3791  <p>A <span>valid non-negative integer</span> represents the number that is represented in base ten
   3792  by that string of digits.</p>
   3793 
   3794  <div class="nodev">
   3795 
   3796  <p>The <dfn>rules for parsing non-negative integers</dfn> are as given in the following algorithm.
   3797  When invoked, the steps must be followed in the order given, aborting at the first step that
   3798  returns a value. This algorithm will return either zero, a positive integer, or an error.</p>
   3799 
   3800  <ol>
   3801 
   3802   <li><p>Let <var data-x="">input</var> be the string being parsed.</p></li>
   3803 
   3804   <li><p>Let <var data-x="">value</var> be the result of parsing <var data-x="">input</var> using the
   3805   <span>rules for parsing integers</span>.</p></li>
   3806 
   3807   <li><p>If <var data-x="">value</var> is an error, return an error.</p></li>
   3808 
   3809   <li><p>If <var data-x="">value</var> is less than zero, return an error.</p></li>
   3810 
   3811   <li><p>Return <var data-x="">value</var>.</p></li>
   3812 
   3813  </ol>
   3814 
   3815  <!-- Implications: A leading + is ignored. A leading - is ignored if the value is zero. -->
   3816 
   3817  </div>
   3818 
   3819 
   3820  <h5>Floating-point numbers</h5>
   3821 
   3822  <p>A string is a <dfn>valid floating-point number</dfn> if it consists of:</p>
   3823 
   3824  <ol class="brief">
   3825 
   3826   <li>Optionally, a U+002D HYPHEN-MINUS character (-).</li>
   3827 
   3828   <li>One or both of the following, in the given order:
   3829 
   3830    <ol>
   3831 
   3832     <li>A series of one or more <span>ASCII digits</span>.</li>
   3833 
   3834     <li>
   3835 
   3836      <ol>
   3837 
   3838       <li>A single U+002E FULL STOP character (.).</li>
   3839 
   3840       <li>A series of one or more <span>ASCII digits</span>.</li>
   3841 
   3842      </ol>
   3843 
   3844     </li>
   3845 
   3846    </ol>
   3847 
   3848   </li>
   3849 
   3850   <li>Optionally:
   3851 
   3852    <ol>
   3853 
   3854     <li>Either a U+0065 LATIN SMALL LETTER E character (e) or a U+0045 LATIN CAPITAL LETTER E
   3855     character (E).</li>
   3856 
   3857     <li>Optionally, a U+002D HYPHEN-MINUS character (-) or U+002B PLUS SIGN character (+).</li>
   3858 
   3859     <li>A series of one or more <span>ASCII digits</span>.</li>
   3860 
   3861    </ol>
   3862 
   3863   </li>
   3864 
   3865  </ol>
   3866 
   3867  <p>A <span>valid floating-point number</span> represents the number obtained by multiplying the
   3868  significand by ten raised to the power of the exponent, where the significand is the first number,
   3869  interpreted as base ten (including the decimal point and the number after the decimal point, if
   3870  any, and interpreting the significand as a negative number if the whole string starts with a
   3871  U+002D HYPHEN-MINUS character (-) and the number is not zero), and where the exponent is the
   3872  number after the E, if any (interpreted as a negative number if there is a U+002D HYPHEN-MINUS
   3873  character (-) between the E and the number and the number is not zero, or else ignoring a U+002B
   3874  PLUS SIGN character (+) between the E and the number if there is one). If there is no E, then the
   3875  exponent is treated as zero.</p>
   3876 
   3877  <p class="note">The Infinity and Not-a-Number (NaN) values are not <span data-x="valid
   3878  floating-point number">valid floating-point numbers</span>.</p>
   3879 
   3880  <div class="nodev">
   3881 
   3882  <p>The <dfn data-x="best representation of the number as a floating-point number">best
   3883  representation of the number <var data-x="">n</var> as a floating-point number</dfn> is the string
   3884  obtained from applying the JavaScript operator ToString to <var data-x="">n</var>. The JavaScript
   3885  operator ToString is not uniquely determined. When there are multiple possible strings that could
   3886  be obtained from the JavaScript operator ToString for a particular value, the user agent must
   3887  always return the same string for that value (though it may differ from the value used by other
   3888  user agents).</p>
   3889 
   3890  <p>The <dfn>rules for parsing floating-point number values</dfn> are as given in the following
   3891  algorithm. This algorithm must be aborted at the first step that returns something. This algorithm
   3892  will return either a number or an error.</p>
   3893 
   3894  <ol>
   3895 
   3896   <li><p>Let <var data-x="">input</var> be the string being parsed.</p></li>
   3897 
   3898   <li><p>Let <var data-x="">position</var> be a pointer into <var data-x="">input</var>, initially
   3899   pointing at the start of the string.</p></li>
   3900 
   3901   <li><p>Let <var data-x="">value</var> have the value 1.</li>
   3902 
   3903   <li><p>Let <var data-x="">divisor</var> have the value 1.</p></li>
   3904 
   3905   <li><p>Let <var data-x="">exponent</var> have the value 1.</p></li>
   3906 
   3907   <li><p><span>Skip whitespace</span>.</p></li>
   3908 
   3909   <li><p>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, return an
   3910   error.</p></li>
   3911 
   3912   <li>
   3913 
   3914    <p>If the character indicated by <var data-x="">position</var> is a U+002D HYPHEN-MINUS character
   3915    (-):</p>
   3916 
   3917    <ol>
   3918 
   3919     <li>Change <var data-x="">value</var> and <var data-x="">divisor</var> to &#x2212;1.</li>
   3920 
   3921     <li>Advance <var data-x="">position</var> to the next character.</li>
   3922 
   3923     <li>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, return an
   3924     error.</li>
   3925 
   3926    </ol>
   3927 
   3928    <p>Otherwise, if the character indicated by <var data-x="">position</var> (the first character)
   3929    is a U+002B PLUS SIGN character (+):</p>
   3930 
   3931    <ol>
   3932 
   3933     <li>Advance <var data-x="">position</var> to the next character. (The "<code data-x="">+</code>"
   3934     is ignored, but it is not conforming.)</li>
   3935 
   3936     <li>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, return an
   3937     error.</li>
   3938 
   3939    </ol>
   3940 
   3941   </li>
   3942 
   3943   <li><p>If the character indicated by <var data-x="">position</var> is a U+002E FULL STOP (.), and
   3944   that is not the last character in <var data-x="">input</var>, and the character after the
   3945   character indicated by <var data-x="">position</var> is an <span data-x="ASCII digits">ASCII
   3946   digit</span>, then set <var data-x="">value</var> to zero and jump to the step labeled
   3947   <i>fraction</i>.</p> <!-- we have to check there's a number so that ".e1" fails to parse but ".0"
   3948   does not -->
   3949 
   3950   <li><p>If the character indicated by <var data-x="">position</var> is not an <span data-x="ASCII
   3951   digits">ASCII digit</span>, then return an error.</p></li>
   3952 
   3953   <li><p><span>Collect a sequence of characters</span> that are <span>ASCII digits</span>, and
   3954   interpret the resulting sequence as a base-ten integer. Multiply <var data-x="">value</var> by
   3955   that integer.</p></li>
   3956 
   3957   <li>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, jump to the
   3958   step labeled <i>conversion</i>.</li>
   3959 
   3960   <li><p><i>Fraction</i>: If the character indicated by <var data-x="">position</var> is a U+002E
   3961   FULL STOP (.), run these substeps:</p>
   3962 
   3963    <ol>
   3964 
   3965     <li><p>Advance <var data-x="">position</var> to the next character.</p></li>
   3966 
   3967     <li><p>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, or if the
   3968     character indicated by <var data-x="">position</var> is not an <span data-x="ASCII digits">ASCII
   3969     digit</span>, U+0065 LATIN SMALL LETTER E (e), or U+0045 LATIN CAPITAL LETTER E (E), then jump
   3970     to the step labeled <i>conversion</i>.</li>
   3971 
   3972     <li><p>If the character indicated by <var data-x="">position</var> is a U+0065 LATIN SMALL
   3973     LETTER E character (e) or a U+0045 LATIN CAPITAL LETTER E character (E), skip the remainder of
   3974     these substeps.</p>
   3975 
   3976     <li><p><i>Fraction loop</i>: Multiply <var data-x="">divisor</var> by ten.</p></li>
   3977 
   3978     <li>Add the value of the character indicated by <var data-x="">position</var>, interpreted as a
   3979     base-ten digit (0..9) and divided by <var data-x="">divisor</var>, to <var
   3980     data-x="">value</var>.</li>
   3981 
   3982     <li><p>Advance <var data-x="">position</var> to the next character.</p></li>
   3983 
   3984     <li><p>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, then jump
   3985     to the step labeled <i>conversion</i>.</li>
   3986 
   3987     <li><p>If the character indicated by <var data-x="">position</var> is an <span data-x="ASCII
   3988     digits">ASCII digit</span>, jump back to the step labeled <i>fraction loop</i> in these
   3989     substeps.</p></li>
   3990 
   3991    </ol>
   3992 
   3993   </li>
   3994 
   3995   <li><p>If the character indicated by <var data-x="">position</var> is a U+0065 LATIN SMALL LETTER
   3996   E character (e) or a U+0045 LATIN CAPITAL LETTER E character (E), run these substeps:</p>
   3997 
   3998    <ol>
   3999 
   4000     <li><p>Advance <var data-x="">position</var> to the next character.</p></li>
   4001 
   4002     <li><p>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, then jump
   4003     to the step labeled <i>conversion</i>.</li>
   4004 
   4005     <li>
   4006 
   4007      <p>If the character indicated by <var data-x="">position</var> is a U+002D HYPHEN-MINUS
   4008      character (-):</p>
   4009 
   4010      <ol>
   4011 
   4012       <li>Change <var data-x="">exponent</var> to &#x2212;1.</li>
   4013 
   4014       <li>Advance <var data-x="">position</var> to the next character.</li>
   4015 
   4016       <li><p>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, then
   4017       jump to the step labeled <i>conversion</i>.</li>
   4018 
   4019      </ol>
   4020 
   4021      <p>Otherwise, if the character indicated by <var data-x="">position</var> is a U+002B PLUS SIGN
   4022      character (+):</p>
   4023 
   4024      <ol>
   4025 
   4026       <li>Advance <var data-x="">position</var> to the next character.</li>
   4027 
   4028       <li><p>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, then
   4029       jump to the step labeled <i>conversion</i>.</li>
   4030 
   4031      </ol>
   4032 
   4033     </li>
   4034 
   4035     <li><p>If the character indicated by <var data-x="">position</var> is not an <span data-x="ASCII
   4036     digits">ASCII digit</span>, then jump to the step labeled <i>conversion</i>.</li>
   4037 
   4038     <li><p><span>Collect a sequence of characters</span> that are <span>ASCII digits</span>, and
   4039     interpret the resulting sequence as a base-ten integer. Multiply <var data-x="">exponent</var>
   4040     by that integer.</p></li>
   4041 
   4042     <li><p>Multiply <var data-x="">value</var> by ten raised to the <var data-x="">exponent</var>th
   4043     power.</p></li>
   4044 
   4045    </ol>
   4046 
   4047   </li>
   4048 
   4049   <li><p><i>Conversion</i>: Let <var data-x="">S</var> be the set of finite IEEE 754
   4050   double-precision floating-point values except &#x2212;0, but with two special values added: 2<sup
   4051   data-x="">1024</sup> and &#x2212;2<sup data-x="">1024</sup>.</p></li>
   4052 
   4053   <li><p>Let <var data-x="">rounded-value</var> be the number in <var data-x="">S</var> that is
   4054   closest to <var data-x="">value</var>, selecting the number with an even significand if there are
   4055   two equally close values. (The two special values 2<sup data-x="">1024</sup> and &#x2212;2<sup
   4056   data-x="">1024</sup> are considered to have even significands for this purpose.)</p></li>
   4057 
   4058   <li><p>If <var data-x="">rounded-value</var> is 2<sup data-x="">1024</sup> or &#x2212;2<sup
   4059   data-x="">1024</sup>, return an error.</p></li>
   4060 
   4061   <li><p>Return <var data-x="">rounded-value</var>.</p></li>
   4062 
   4063  </ol>
   4064 
   4065  </div>
   4066 
   4067 
   4068 <div class="nodev">
   4069  <h5 id="percentages-and-dimensions">Percentages and lengths</h5>
   4070 
   4071  <p>The <dfn>rules for parsing dimension values</dfn> are as given in the following algorithm. When
   4072  invoked, the steps must be followed in the order given, aborting at the first step that returns a
   4073  value. This algorithm will return either a number greater than or equal to 1.0, or an error; if a
   4074  number is returned, then it is further categorised as either a percentage or a length.</p>
   4075 
   4076  <ol>
   4077 
   4078   <li><p>Let <var data-x="">input</var> be the string being parsed.</p></li>
   4079 
   4080   <li><p>Let <var data-x="">position</var> be a pointer into <var data-x="">input</var>, initially
   4081   pointing at the start of the string.</p></li>
   4082 
   4083   <li><p><span>Skip whitespace</span>.</p></li>
   4084 
   4085   <li><p>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, return an
   4086   error.</p></li>
   4087 
   4088   <li><p>If the character indicated by <var data-x="">position</var> is a U+002B PLUS SIGN character
   4089   (+), advance <var data-x="">position</var> to the next character.</li>
   4090 
   4091   <li><p><span>Collect a sequence of characters</span> that are U+0030 DIGIT ZERO (0) characters,
   4092   and discard them.</p></li>
   4093 
   4094   <li><p>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, return an
   4095   error.</p></li>
   4096 
   4097   <li><p>If the character indicated by <var data-x="">position</var> is not one of U+0031 DIGIT ONE
   4098   (1) to U+0039 DIGIT NINE (9), then return an error.</p></li>
   4099 
   4100   <!-- Ok. At this point we know we have a number. It might have trailing garbage which we'll
   4101   ignore, but it's a number, and we won't return an error. -->
   4102 
   4103   <li><p><span>Collect a sequence of characters</span> that are <span>ASCII digits</span>, and
   4104   interpret the resulting sequence as a base-ten integer. Let <var data-x="">value</var> be that
   4105   number.</li>
   4106 
   4107   <li><p>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, return <var
   4108   data-x="">value</var> as a length.</p></li>
   4109 
   4110   <li>
   4111 
   4112    <p>If the character indicated by <var data-x="">position</var> is a U+002E FULL STOP character
   4113    (.):</p>
   4114 
   4115    <ol>
   4116 
   4117     <li><p>Advance <var data-x="">position</var> to the next character.</p></li>
   4118 
   4119     <li><p>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, or if the
   4120     character indicated by <var data-x="">position</var> is not an <span data-x="ASCII digits">ASCII
   4121     digit</span>, then return <var data-x="">value</var> as a length.</li>
   4122 
   4123     <li><p>Let <var data-x="">divisor</var> have the value 1.</p></li>
   4124 
   4125     <li><p><i>Fraction loop</i>: Multiply <var data-x="">divisor</var> by ten.</p></li>
   4126 
   4127     <li>Add the value of the character indicated by <var data-x="">position</var>, interpreted as a
   4128     base-ten digit (0..9) and divided by <var data-x="">divisor</var>, to <var
   4129     data-x="">value</var>.</li>
   4130 
   4131     <li><p>Advance <var data-x="">position</var> to the next character.</p></li>
   4132 
   4133     <li><p>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, then
   4134     return <var data-x="">value</var> as a length.</li>
   4135 
   4136     <li><p>If the character indicated by <var data-x="">position</var> is an <span data-x="ASCII
   4137     digits">ASCII digit</span>, return to the step labeled <i>fraction loop</i> in these
   4138     substeps.</p></li>
   4139 
   4140    </ol>
   4141 
   4142   </li>
   4143 
   4144   <li><p>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, return <var
   4145   data-x="">value</var> as a length.</p></li>
   4146 
   4147   <li><p>If the character indicated by <var data-x="">position</var> is a U+0025 PERCENT SIGN
   4148   character (%), return <var data-x="">value</var> as a percentage.</p></li>
   4149 
   4150   <li><p>Return <var data-x="">value</var> as a length.</p></li>
   4151 
   4152  </ol>
   4153 
   4154  </div>
   4155 
   4156 
   4157  <h5>Lists of integers</h5>
   4158 
   4159  <p>A <dfn>valid list of integers</dfn> is a number of <span data-x="valid integer">valid
   4160  integers</span> separated by U+002C COMMA characters, with no other characters (e.g. no <span
   4161  data-x="space character">space characters</span>). In addition, there might be restrictions on the
   4162  number of integers that can be given, or on the range of values allowed.</p>
   4163 
   4164  <div class="nodev">
   4165 
   4166  <p>The <dfn>rules for parsing a list of integers</dfn> are as follows:</p>
   4167 
   4168  <ol>
   4169 
   4170   <li><p>Let <var data-x="">input</var> be the string being parsed.</p></li>
   4171 
   4172   <li><p>Let <var data-x="">position</var> be a pointer into <var data-x="">input</var>, initially
   4173   pointing at the start of the string.</p></li>
   4174 
   4175   <li><p>Let <var data-x="">numbers</var> be an initially empty list of integers. This list will be
   4176   the result of this algorithm.</p></li>
   4177 
   4178   <li><p>If there is a character in the string <var data-x="">input</var> at position <var
   4179   data-x="">position</var>, and it is either a U+0020 SPACE, U+002C COMMA, or U+003B SEMICOLON
   4180   character, then advance <var data-x="">position</var> to the next character in <var
   4181   data-x="">input</var>, or to beyond the end of the string if there are no more
   4182   characters.</p></li>
   4183 
   4184   <li><p>If <var data-x="">position</var> points to beyond the end of <var data-x="">input</var>,
   4185   return <var data-x="">numbers</var> and abort.</p></li>
   4186 
   4187   <li><p>If the character in the string <var data-x="">input</var> at position <var
   4188   data-x="">position</var> is a U+0020 SPACE, U+002C COMMA, or U+003B SEMICOLON character, then
   4189   return to step 4.</li>
   4190 
   4191   <li><p>Let <var data-x="">negated</var> be false.</p></li> <li><p>Let <var data-x="">value</var> be
   4192   0.</p></li>
   4193 
   4194   <li><p>Let <var data-x="">started</var> be false. This variable is set to true when the parser
   4195   sees a number or a U+002D HYPHEN-MINUS character (-).</p></li>
   4196 
   4197   <li><p>Let <var data-x="">got number</var> be false. This variable is set to true when the parser
   4198   sees a number.</p></li>
   4199 
   4200   <li><p>Let <var data-x="">finished</var> be false. This variable is set to true to switch parser
   4201   into a mode where it ignores characters until the next separator.</p></li>
   4202 
   4203   <li><p>Let <var data-x="">bogus</var> be false.</p></li>
   4204 
   4205   <li><p><i>Parser</i>: If the character in the string <var data-x="">input</var> at position <var
   4206   data-x="">position</var> is:</p>
   4207 
   4208    <dl class="switch">
   4209 
   4210     <dt>A U+002D HYPHEN-MINUS character</dt>
   4211 
   4212     <dd>
   4213 
   4214      <p>Follow these substeps:</p>
   4215 
   4216      <ol>
   4217 
   4218       <li>If <var data-x="">got number</var> is true, let <var data-x="">finished</var> be true.</li>
   4219 
   4220       <li>If <var data-x="">finished</var> is true, skip to the next step in the overall set of
   4221       steps.</li>
   4222 
   4223       <li>If <var data-x="">started</var> is true, let <var data-x="">negated</var> be false.</li>
   4224 
   4225       <li>Otherwise, if <var data-x="">started</var> is false and if <var data-x="">bogus</var> is
   4226       false, let <var data-x="">negated</var> be true.</li>
   4227 
   4228       <li>Let <var data-x="">started</var> be true.</li>
   4229 
   4230      </ol>
   4231 
   4232     </dd>
   4233 
   4234     <dt>An <span data-x="ASCII digits">ASCII digit</span></dt>
   4235 
   4236     <dd>
   4237 
   4238      <p>Follow these substeps:</p>
   4239 
   4240      <ol>
   4241 
   4242       <li>If <var data-x="">finished</var> is true, skip to the next step in the overall set of
   4243       steps.</li>
   4244 
   4245       <li>Multiply <var data-x="">value</var> by ten.</li>
   4246 
   4247       <li>Add the value of the digit, interpreted in base ten, to <var data-x="">value</var>.</li>
   4248 
   4249       <li>Let <var data-x="">started</var> be true.</li>
   4250 
   4251       <li>Let <var data-x="">got number</var> be true.</li>
   4252 
   4253      </ol>
   4254 
   4255     </dd>
   4256 
   4257 
   4258     <dt>A U+0020 SPACE character</dt>
   4259     <dt>A U+002C COMMA character</dt>
   4260     <dt>A U+003B SEMICOLON character</dt>
   4261 
   4262     <dd>
   4263 
   4264      <p>Follow these substeps:</p>
   4265 
   4266      <ol>
   4267 
   4268       <li>If <var data-x="">got number</var> is false, return the <var data-x="">numbers</var> list
   4269       and abort. This happens if an entry in the list has no digits, as in "<code
   4270       data-x="">1,2,x,4</code>".</li>
   4271 
   4272       <li>If <var data-x="">negated</var> is true, then negate <var data-x="">value</var>.</li>
   4273 
   4274       <li>Append <var data-x="">value</var> to the <var data-x="">numbers</var> list.</li>
   4275 
   4276       <li>Jump to step 4 in the overall set of steps.</li>
   4277 
   4278      </ol>
   4279 
   4280     </dd>
   4281 
   4282 
   4283     <!-- <dt>A U+002E FULL STOP character</dt> -->
   4284     <dt>A character in the range U+0001 to U+001F, <!-- space --> U+0021 to U+002B, <!-- comma --> U+002D to U+002F, <!-- digits --> U+003A, <!-- semicolon --> U+003C to U+0040, <!-- a-z --> U+005B to U+0060, <!-- A-Z --> U+007b to U+007F
   4285        (i.e. any other non-alphabetic ASCII character)</dt>
   4286 
   4287 <!--
   4288 Test: http://www.hixie.ch/tests/adhoc/html/flow/image-maps/004-demo.html
   4289 IE6 on Wine treats the following characters like this also: U+1-U+1f, U+21-U+2b, U+2d-U+2f, U+3a,
   4290 U+3c-U+40, U+5b-U+60, U+7b-U+82, U+84-U+89, U+8b, U+8d, U+8f-U+99, U+9b, U+9d, U+a0-U+bf, U+d7,
   4291 U+f7, U+1f6-U+1f9, U+218-U+24f, U+2a9-U+385, U+387, U+38b, U+38d, U+3a2, U+3cf, U+3d7-U+3d9, U+3db,
   4292 U+3dd, U+3df, U+3e1, U+3f4-U+400, U+40d, U+450, U+45d, U+482-U+48f, U+4c5-U+4c6, U+4c9-U+4ca,
   4293 U+4cd-U+4cf, U+4ec-U+4ed, U+4f6-U+4f7, U+4fa-U+530, U+557-U+560, U+588-U+5cf, U+5eb-U+5ef,
   4294 U+5f3-U+620, U+63b-U+640, U+64b-U+670, U+6b8-U+6b9, U+6bf, U+6cf, U+6d4, U+6d6-U+904, U+93a-U+957,
   4295 U+962-U+984, U+98d-U+98e, U+991-U+992, U+9a9, U+9b1, U+9b3-U+9b5, U+9ba-U+9db, U+9de, U+9e2-U+9ef,
   4296 U+9f2-U+a04, U+a0b-U+a0e, U+a11-U+a12, U+a29, U+a31, U+a34, U+a37, U+a3a-U+a58, U+a5d, U+a5f-U+a84,
   4297 U+a8c, U+a8e, U+a92, U+aa9, U+ab1, U+ab4, U+aba-U+adf, U+ae1-U+b04, U+b0d-U+b0e, U+b11-U+b12,
   4298 U+b29, U+b31, U+b34-U+b35, U+b3a-U+b5b, U+b5e, U+b62-U+b84, U+b8b-U+b8d, U+b91, U+b96-U+b98, U+b9b,
   4299 U+b9d, U+ba0-U+ba2, U+ba5-U+ba7, U+bab-U+bad, U+bb6, U+bba-U+c04, U+c0d, U+c11, U+c29, U+c34,
   4300 U+c3a-U+c5f, U+c62-U+c84, U+c8d, U+c91, U+ca9, U+cb4, U+cba-U+cdd, U+cdf, U+ce2-U+d04, U+d0d,
   4301 U+d11, U+d29, U+d3a-U+d5f, U+d62-U+e00, U+e2f, U+e31, U+e34-U+e3f, U+e46-U+e80, U+e83, U+e85-U+e86,
   4302 U+e89, U+e8b-U+e8c, U+e8e-U+e93, U+e98, U+ea0, U+ea4, U+ea6, U+ea8-U+ea9, U+eac, U+eaf-U+edb,
   4303 U+ede-U+109f, U+10c6-U+10cf, U+10f7-U+10ff, U+115a-U+115e, U+11a3-U+11a7, U+11fa-U+1dff,
   4304 U+1e9b-U+1e9f, U+1efa-U+1eff, U+1f16-U+1f17, U+1f1e-U+1f1f, U+1f46-U+1f47, U+1f4e-U+1f4f, U+1f58,
   4305 U+1f5a, U+1f5c, U+1f5e, U+1f7e-U+1f7f, U+1fb5, U+1fbd-U+1fc1, U+1fc5, U+1fcd-U+1fcf, U+1fd4-U+1fd5,
   4306 U+1fdc-U+1fdf, U+1fed-U+1ff1, U+1ff5, U+1ffd-U+249b, U+24ea-U+3004, U+3006-U+3040, U+3095-U+309a,
   4307 U+309f-U+30a0, U+30fb, U+30ff-U+3104, U+312d-U+3130, U+318f-U+4dff, U+9fa6-U+abff, U+d7a4-U+d7ff,
   4308 U+e000-U+f8ff, U+fa2e-U+faff, U+fb07-U+fb12, U+fb18-U+fb1e, U+fb37, U+fb3d, U+fb3f, U+fb42, U+fb45,
   4309 U+fbb2-U+fbd2, U+fbe9, U+fce1, U+fd3e-U+fd4f, U+fd90-U+fd91, U+fdc8-U+fdef, U+fdfc-U+fe7f,
   4310 U+fefd-U+ff20, U+ff3b-U+ff40, U+ff5b-U+ff65, U+ffa0, U+ffbf-U+ffc1, U+ffc8-U+ffc9, U+ffd0-U+ffd1,
   4311 U+ffd8-U+ffd9, U+ffdd-U+ffff
   4312 IE7 on Win2003 treats the following characters like this also instead: U+1-U+1f, U+21-U+2b,
   4313 U+2d-U+2f, U+3a, U+3c-U+40, U+5b-U+60, U+7b-U+82, U+84-U+89, U+8b, U+8d, U+8f-U+99, U+9b, U+9d,
   4314 U+a0-U+a9, U+ab-U+b4, U+b6-U+b9, U+bb-U+bf, U+d7, U+f7, U+220-U+221, U+234-U+24f, U+2ae-U+2af,
   4315 U+2b9-U+2ba, U+2c2-U+2df, U+2e5-U+2ed, U+2ef-U+344, U+346-U+379, U+37b-U+385, U+387, U+38b, U+38d,
   4316 U+3a2, U+3cf, U+3d8-U+3d9, U+3f4-U+3ff, U+482-U+48b, U+4c5-U+4c6, U+4c9-U+4ca, U+4cd-U+4cf,
   4317 U+4f6-U+4f7, U+4fa-U+530, U+557-U+558, U+55a-U+560, U+588-U+5cf, U+5eb-U+5ef, U+5f3-U+620,
   4318 U+63b-U+640, U+656-U+66f, U+6d4, U+6dd-U+6e0, U+6e9-U+6ec, U+6ee-U+6f9, U+6fd-U+70f, U+72d-U+72f,
   4319 U+740-U+77f, U+7b1-U+900, U+904, U+93a-U+93c, U+94d - U+94f, U+951-U+957, U+964-U+980, U+984,
   4320 U+98d-U+98e, U+991-U+992, U+9a9, U+9b1, U+9b3-U+9b5, U+9ba-U+9bd, U+9c5-U+9c6, U+9c9-U+9ca,
   4321 U+9cd-U+9d6, U+9d8-U+9db, U+9de, U+9e4-U+9ef, U+9f2-U+a01, U+a03-U+a04, U+a0b-U+a0e, U+a11-U+a12,
   4322 U+a29, U+a31, U+a34, U+a37, U+a3a-U+a3d, U+a43-U+a46, U+a49-U+a4a, U+a4d-U+a58, U+a5d, U+a5f-U+a6f,
   4323 U+a75-U+a80, U+a84, U+a8c, U+a8e, U+a92, U+aa9, U+ab1, U+ab4, U+aba-U+abc, U+ac6, U+aca,
   4324 U+acd-U+acf, U+ad1-U+adf, U+ae1-U+b00, U+b04, U+b0d-U+b0e, U+b11-U+b12, U+b29, U+b31, U+b34-U+b35,
   4325 U+b3a-U+b3c, U+b44-U+b46, U+b49 - U+b4a, U+b4d-U+b55, U+b58-U+b5b, U+b5e, U+b62-U+b81, U+b84,
   4326 U+b8b-U+b8d, U+b91, U+b96-U+b98, U+b9b, U+b9d, U+ba0 - U+ba2, U+ba5-U+ba7, U+bab-U+bad, U+bb6,
   4327 U+bba-U+bbd, U+bc3-U+bc5, U+bc9, U+bcd-U+bd6, U+bd8-U+c00, U+c04, U+c0d, U+c11, U+c29, U+c34,
   4328 U+c3a-U+c3d, U+c45, U+c49, U+c4d-U+c54, U+c57-U+c5f, U+c62-U+c81, U+c84, U+c8d, U+c91, U+ca9,
   4329 U+cb4, U+cba-U+cbd, U+cc5, U+cc9, U+ccd-U+cd4, U+cd7-U+cdd, U+cdf, U+ce2-U+d01, U+d04, U+d0d,
   4330 U+d11, U+d29, U+d3a-U+d3d, U+d44-U+d45, U+d49, U+d4d-U+d56, U+d58-U+d5f, U+d62-U+d81, U+d84,
   4331 U+d97-U+d99, U+db2, U+dbc, U+dbe - U+dbf, U+dc7-U+dce, U+dd5, U+dd7, U+de0-U+df1, U+df4-U+e00,
   4332 U+e3b-U+e3f, U+e4f-U+e80, U+e83, U+e85-U+e86, U+e89, U+e8b-U+e8c, U+e8e-U+e93, U+e98, U+ea0, U+ea4,
   4333 U+ea6, U+ea8-U+ea9, U+eac, U+eba, U+ebe-U+ebf, U+ec5-U+ecc, U+ece-U+edb, U+ede-U+eff, U+f01-U+f3f,
   4334 U+f48, U+f6b-U+f70, U+f82-U+f87, U+f8c-U+f8f, U+f98, U+fbd-U+fff, U+1022, U+1028, U+102b,
   4335 U+1033-U+1035, U+1037, U+1039-U+104f, U+105a-U+109f, U+10c6-U+10cf, U+10f7-U+10ff, U+115a - U+115e,
   4336 U+11a3-U+11a7, U+11fa-U+11ff, U+1207, U+1247, U+1249, U+124e-U+124f, U+1257, U+1259, U+125e-U+125f,
   4337 U+1287, U+1289, U+128e-U+128f, U+12af, U+12b1, U+12b6-U+12b7, U+12bf, U+12c1, U+12c6-U+12c7,
   4338 U+12cf, U+12d7, U+12ef, U+130f, U+1311, U+1316-U+1317, U+131f, U+1347, U+135b-U+139f,
   4339 U+13f5-U+1400, U+166d-U+166e, U+1677-U+1680, U+169b - U+169f, U+16eb-U+177f, U+17c9-U+181f, U+1843,
   4340 U+1878-U+187f, U+18aa-U+1dff, U+1e9c-U+1e9f, U+1efa-U+1eff, U+1f16-U+1f17, U+1f1e-U+1f1f,
   4341 U+1f46-U+1f47, U+1f4e-U+1f4f, U+1f58, U+1f5a, U+1f5c, U+1f5e, U+1f7e-U+1f7f, U+1fb5, U+1fbd,
   4342 U+1fbf-U+1fc1, U+1fc5, U+1fcd-U+1fcf, U+1fd4-U+1fd5, U+1fdc-U+1fdf, U+1fed-U+1ff1, U+1ff5,
   4343 U+1ffd-U+207e, U+2080-U+2101, U+2103-U+2106, U+2108-U+2109, U+2114, U+2116-U+2118, U+211e-U+2123,
   4344 U+2125, U+2127, U+2129, U+212e, U+2132, U+213a-U+215f, U+2184-U+3005, U+3008-U+3020, U+302a-U+3037,
   4345 U+303b-U+3104, U+312d-U+3130, U+318f - U+319f, U+31b8-U+33ff, U+4db6-U+4dff, U+9fa6-U+9fff,
   4346 U+a48d-U+abff, U+d7a4-U+d7ff, U+e000-U+f8ff, U+fa2e-U+faff, U+fb07-U+fb12, U+fb18-U+fb1c, U+fb1e,
   4347 U+fb29, U+fb37, U+fb3d, U+fb3f, U+fb42, U+fb45, U+fbb2-U+fbd2, U+fd3e-U+fd4f, U+fd90-U+fd91,
   4348 U+fdc8-U+fdef, U+fdfc-U+fe6f, U+fe73, U+fe75, U+fefd-U+ff20, U+ff3b-U+ff40, U+ff5b-U+ff9f,
   4349 U+ffbf-U+ffc1, U+ffc8-U+ffc9, U+ffd0-U+ffd1, U+ffd8-U+ffd9, U+ffdd-U+ffff
   4350 -->
   4351 
   4352     <dd>
   4353 
   4354      <p>Follow these substeps:</p>
   4355 
   4356      <ol>
   4357 
   4358       <li>If <var data-x="">got number</var> is true, let <var data-x="">finished</var> be true.</li>
   4359 
   4360       <li>If <var data-x="">finished</var> is true, skip to the next step in the overall set of
   4361       steps.</li>
   4362 
   4363       <li>Let <var data-x="">negated</var> be false.</li>
   4364 
   4365      </ol>
   4366 
   4367     </dd>
   4368 
   4369 
   4370     <dt>Any other character</dt>
   4371     <!-- alphabetic a-z A-Z, and non-ASCII -->
   4372 
   4373     <dd>
   4374 
   4375      <p>Follow these substeps:</p>
   4376 
   4377      <ol>
   4378 
   4379       <li>If <var data-x="">finished</var> is true, skip to the next step in the overall set of
   4380       steps.</li>
   4381 
   4382       <li>Let <var data-x="">negated</var> be false.</li>
   4383 
   4384       <li>Let <var data-x="">bogus</var> be true.</li>
   4385 
   4386       <li>If <var data-x="">started</var> is true, then return the <var data-x="">numbers</var> list,
   4387       and abort. (The value in <var data-x="">value</var> is not appended to the list first; it is
   4388       dropped.)</li>
   4389 
   4390      </ol>
   4391 
   4392     </dd>
   4393 
   4394    </dl>
   4395 
   4396   </li>
   4397 
   4398   <li><p>Advance <var data-x="">position</var> to the next character in <var data-x="">input</var>,
   4399   or to beyond the end of the string if there are no more characters.</p></li>
   4400 
   4401   <li><p>If <var data-x="">position</var> points to a character (and not to beyond the end of <var
   4402   data-x="">input</var>), jump to the big <i>Parser</i> step above.</p></li>
   4403 
   4404   <li><p>If <var data-x="">negated</var> is true, then negate <var data-x="">value</var>.</li>
   4405 
   4406   <li><p>If <var data-x="">got number</var> is true, then append <var data-x="">value</var> to the
   4407   <var data-x="">numbers</var> list.</li>
   4408 
   4409   <li><p>Return the <var data-x="">numbers</var> list and abort.</p></li>
   4410 
   4411  </ol>
   4412 
   4413  </div>
   4414 
   4415 
   4416  <div class="nodev">
   4417 
   4418  <h5>Lists of dimensions</h5>
   4419 
   4420  <!-- no definition of a type since no conforming feature uses this syntax (it's only used in
   4421  cols="" and rows="" on <frameset> elements -->
   4422 
   4423  <p>The <dfn>rules for parsing a list of dimensions</dfn> are as follows. These rules return a list
   4424  of zero or more pairs consisting of a number and a unit, the unit being one of <i>percentage</i>,
   4425  <i>relative</i>, and <i>absolute</i>.</p>
   4426 
   4427  <ol>
   4428 
   4429   <li><p>Let <var data-x="">raw input</var> be the string being parsed.</p></li>
   4430 
   4431   <li><p>If the last character in <var data-x="">raw input</var> is a U+002C COMMA character (,),
   4432   then remove that character from <var data-x="">raw input</var>.</p></li>
   4433 
   4434   <li><p><span data-x="split a string on commas">Split the string <var data-x="">raw input</var> on
   4435   commas</span>. Let <var data-x="">raw tokens</var> be the resulting list of tokens.</p></li>
   4436 
   4437   <li><p>Let <var data-x="">result</var> be an empty list of number/unit pairs.</p></li>
   4438 
   4439   <li>
   4440 
   4441    <p>For each token in <var data-x="">raw tokens</var>, run the following substeps:</p>
   4442 
   4443    <ol>
   4444 
   4445     <li><p>Let <var data-x="">input</var> be the token.</p></li>
   4446 
   4447     <li><p>Let <var data-x="">position</var> be a pointer into <var data-x="">input</var>,
   4448     initially pointing at the start of the string.</p></li>
   4449 
   4450     <li><p>Let <var data-x="">value</var> be the number 0.</p></li>
   4451 
   4452     <li><p>Let <var data-x="">unit</var> be <i>absolute</i>.</p></li>
   4453 
   4454     <li><p>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, set <var
   4455     data-x="">unit</var> to <i>relative</i> and jump to the last substep.</p></li>
   4456 
   4457     <li><p>If the character at <var data-x="">position</var> is an <span data-x="ASCII
   4458     digits">ASCII digit</span>, <span>collect a sequence of characters</span> that are <span>ASCII
   4459     digits</span>, interpret the resulting sequence as an integer in base ten, and increment <var
   4460     data-x="">value</var> by that integer.</p></li>
   4461 
   4462     <li>
   4463 
   4464      <p>If the character at <var data-x="">position</var> is a U+002E FULL STOP character (.), run
   4465      these substeps:</p>
   4466 
   4467      <ol>
   4468 
   4469       <li><p><span>Collect a sequence of characters</span> consisting of <span data-x="space
   4470       character">space characters</span> and <span>ASCII digits</span>. Let <var data-x="">s</var>
   4471       be the resulting sequence.</p></li>
   4472 
   4473       <li><p>Remove all <span data-x="space character">space characters</span> in <var
   4474       data-x="">s</var>.</p></li>
   4475 
   4476       <li>
   4477 
   4478        <p>If <var data-x="">s</var> is not the empty string, run these subsubsteps:</p>
   4479 
   4480        <ol>
   4481 
   4482         <li><p>Let <var data-x="">length</var> be the number of characters in <var
   4483         data-x="">s</var> (after the spaces were removed).</p></li>
   4484 
   4485         <li><p>Let <var data-x="">fraction</var> be the result of interpreting <var
   4486         data-x="">s</var> as a base-ten integer, and then dividing that number by <span
   4487         data-x="">10<sup data-x=""><var data-x="">length</var></sup></span>.</li>
   4488 
   4489         <li><p>Increment <var data-x="">value</var> by <var data-x="">fraction</var>.</p></li>
   4490 
   4491        </ol>
   4492 
   4493       </li>
   4494 
   4495      </ol>
   4496 
   4497     </li>
   4498 
   4499     <li><p><span>Skip whitespace</span>.</p></li>
   4500 
   4501     <li>
   4502 
   4503      <p>If the character at <var data-x="">position</var> is a U+0025 PERCENT SIGN character (%),
   4504      then set <var data-x="">unit</var> to <i>percentage</i>.</p>
   4505 
   4506      <p>Otherwise, if the character at <var data-x="">position</var> is a U+002A ASTERISK character
   4507      (*), then set <var data-x="">unit</var> to <i>relative</i>.</p>
   4508 
   4509     </li>
   4510 
   4511     <!-- the remaining characters in /input/ are ignored -->
   4512 
   4513     <li><p>Add an entry to <var data-x="">result</var> consisting of the number given by <var
   4514     data-x="">value</var> and the unit given by <var data-x="">unit</var>.</p></li>
   4515 
   4516    </ol>
   4517 
   4518   </li>
   4519 
   4520   <li><p>Return the list <var data-x="">result</var>.</p></li>
   4521 
   4522  </ol>
   4523 
   4524  </div>
   4525 
   4526 
   4527  <h4>Dates and times</h4>
   4528 
   4529  <p>In the algorithms below, the <dfn>number of days in month <var data-x="">month</var> of year
   4530  <var data-x="">year</var></dfn> is: <em>31</em> if <var data-x="">month</var> is 1, 3, 5, 7, 8,
   4531  10, or 12; <em>30</em> if <var data-x="">month</var> is 4, 6, 9, or 11; <em>29</em> if <var
   4532  data-x="">month</var> is 2 and <var data-x="">year</var> is a number divisible by 400, or if <var
   4533  data-x="">year</var> is a number divisible by 4 but not by 100; and <em>28</em> otherwise. This
   4534  takes into account leap years in the Gregorian calendar. <a
   4535  href="#refsGREGORIAN">[GREGORIAN]</a></p>
   4536 
   4537  <p>When <span>ASCII digits</span> are used in the date and time syntaxes defined in this section,
   4538  they express numbers in base ten.</p>
   4539 
   4540  <div class="nodev">
   4541 
   4542  <p class="note">While the formats described here are intended to be subsets of the corresponding
   4543  ISO8601 formats, this specification defines parsing rules in much more detail than ISO8601.
   4544  Implementors are therefore encouraged to carefully examine any date parsing libraries before using
   4545  them to implement the parsing rules described below; ISO8601 libraries might not parse dates and
   4546  times in exactly the same manner. <a href="#refsISO8601">[ISO8601]</a></p>
   4547 
   4548  </div>
   4549 
   4550  <p>Where this specification refers to the <dfn>proleptic Gregorian calendar</dfn>, it means the
   4551  modern Gregorian calendar, extrapolated backwards to year 1. A date in the <span>proleptic
   4552  Gregorian calendar</span>, sometimes explicitly referred to as a <dfn>proleptic-Gregorian
   4553  date</dfn>, is one that is described using that calendar even if that calendar was not in use at
   4554  the time (or place) in question. <a href="#refsGREGORIAN">[GREGORIAN]</a></p>
   4555 
   4556  <p class="note">The use of the Gregorian calendar as the wire format in this specification is an
   4557  arbitrary choice resulting from the cultural biases of those involved in the decision. See also
   4558  the section discussing <a href="#input-author-notes">date, time, and number formats</a> in forms
   4559  <span class="nodev">(for authors), <a href="#input-impl-notes">implemention notes regarding
   4560  localization of form controls</a>,</span> and the <code>time</code> element.</p>
   4561 
   4562 
   4563  <h5>Months</h5>
   4564 
   4565  <p>A <dfn data-x="concept-month">month</dfn> consists of a specific <span>proleptic-Gregorian
   4566  date</span> with no time-zone information and no date information beyond a year and a month. <a
   4567  href="#refsGREGORIAN">[GREGORIAN]</a></p>
   4568 
   4569  <p>A string is a <dfn>valid month string</dfn> representing a year <var data-x="">year</var> and
   4570  month <var data-x="">month</var> if it consists of the following components in the given order:</p>
   4571 
   4572  <ol>
   4573 
   4574   <li>Four or more <span>ASCII digits</span>, representing <var data-x="">year</var>, where <var
   4575   data-x="">year</var>&nbsp;&gt;&nbsp;0</li>
   4576 
   4577   <li>A U+002D HYPHEN-MINUS character (-)</li>
   4578 
   4579   <li>Two <span>ASCII digits</span>, representing the month <var data-x="">month</var>, in the range
   4580   1&nbsp;&le;&nbsp;<var data-x="">month</var>&nbsp;&le;&nbsp;12</li>
   4581 
   4582  </ol>
   4583 
   4584  <div class="nodev">
   4585 
   4586  <p>The rules to <dfn>parse a month string</dfn> are as follows. This will return either a year and
   4587  month, or nothing. If at any point the algorithm says that it "fails", this means that it is
   4588  aborted at that point and returns nothing.</p>
   4589 
   4590  <ol>
   4591 
   4592   <li><p>Let <var data-x="">input</var> be the string being parsed.</p></li>
   4593 
   4594   <li><p>Let <var data-x="">position</var> be a pointer into <var data-x="">input</var>, initially
   4595   pointing at the start of the string.</p></li>
   4596 
   4597   <li><p><span>Parse a month component</span> to obtain <var data-x="">year</var> and <var
   4598   data-x="">month</var>. If this returns nothing, then fail.</p>
   4599 
   4600   <li><p>If <var data-x="">position</var> is <em>not</em> beyond the
   4601   end of <var data-x="">input</var>, then fail.</p></li>
   4602 
   4603   <li><p>Return <var data-x="">year</var> and <var data-x="">month</var>.</p></li>
   4604 
   4605  </ol>
   4606 
   4607  <p>The rules to <dfn>parse a month component</dfn>, given an <var data-x="">input</var> string and
   4608  a <var data-x="">position</var>, are as follows. This will return either a year and a month, or
   4609  nothing. If at any point the algorithm says that it "fails", this means that it is aborted at that
   4610  point and returns nothing.</p>
   4611 
   4612  <ol>
   4613 
   4614   <li><p><span>Collect a sequence of characters</span> that are <span>ASCII digits</span>. If the
   4615   collected sequence is not at least four characters long, then fail. Otherwise, interpret the
   4616   resulting sequence as a base-ten integer. Let that number be the <var
   4617   data-x="">year</var>.</p></li>
   4618 
   4619   <li><p>If <var data-x="">year</var> is not a number greater than zero, then fail.</p></li>
   4620 
   4621   <li><p>If <var data-x="">position</var> is beyond the end of <var data-x="">input</var> or if the
   4622   character at <var data-x="">position</var> is not a U+002D HYPHEN-MINUS character, then fail.
   4623   Otherwise, move <var data-x="">position</var> forwards one character.</p></li>
   4624 
   4625   <li><p><span>Collect a sequence of characters</span> that are <span>ASCII digits</span>. If the
   4626   collected sequence is not exactly two characters long, then fail. Otherwise, interpret the
   4627   resulting sequence as a base-ten integer. Let that number be the <var
   4628   data-x="">month</var>.</p></li>
   4629 
   4630   <li><p>If <var data-x="">month</var> is not a number in the range 1&nbsp;&le;&nbsp;<var
   4631   data-x="">month</var>&nbsp;&le;&nbsp;12, then fail.</p></li>
   4632 
   4633   <li><p>Return <var data-x="">year</var> and <var data-x="">month</var>.</p></li>
   4634 
   4635  </ol>
   4636 
   4637  </div>
   4638 
   4639 
   4640  <h5>Dates</h5>
   4641 
   4642  <p>A <dfn data-x="concept-date">date</dfn> consists of a specific <span>proleptic-Gregorian
   4643  date</span> with no time-zone information, consisting of a year, a month, and a day. <a
   4644  href="#refsGREGORIAN">[GREGORIAN]</a></p>
   4645 
   4646  <p>A string is a <dfn>valid date string</dfn> representing a year <var data-x="">year</var>, month
   4647  <var data-x="">month</var>, and day <var data-x="">day</var> if it consists of the following
   4648  components in the given order:</p>
   4649 
   4650  <ol>
   4651 
   4652   <li>A <span>valid month string</span>, representing <var data-x="">year</var> and <var
   4653   data-x="">month</var></li>
   4654 
   4655   <li>A U+002D HYPHEN-MINUS character (-)</li>
   4656 
   4657   <li>Two <span>ASCII digits</span>, representing <var data-x="">day</var>, in the range
   4658   1&nbsp;&le;&nbsp;<var data-x="">day</var>&nbsp;&le;&nbsp;<var data-x="">maxday</var> where <var
   4659   data-x="">maxday</var> is the <span data-x="number of days in month month of year year">number of
   4660   days in the month <var data-x="">month</var> and year <var data-x="">year</var></span></li>
   4661 
   4662  </ol>
   4663 
   4664  <div class="nodev">
   4665 
   4666  <p>The rules to <dfn>parse a date string</dfn> are as follows. This will return either a date, or
   4667  nothing. If at any point the algorithm says that it "fails", this means that it is aborted at that
   4668  point and returns nothing.</p>
   4669 
   4670  <ol>
   4671 
   4672   <li><p>Let <var data-x="">input</var> be the string being parsed.</p></li>
   4673 
   4674   <li><p>Let <var data-x="">position</var> be a pointer into <var data-x="">input</var>, initially
   4675   pointing at the start of the string.</p></li>
   4676 
   4677   <li><p><span>Parse a date component</span> to obtain <var data-x="">year</var>, <var
   4678   data-x="">month</var>, and <var data-x="">day</var>. If this returns nothing, then fail.</p>
   4679 
   4680   <li><p>If <var data-x="">position</var> is <em>not</em> beyond the end of <var
   4681   data-x="">input</var>, then fail.</p></li>
   4682 
   4683   <li><p>Let <var data-x="">date</var> be the date with year <var data-x="">year</var>, month <var
   4684   data-x="">month</var>, and day <var data-x="">day</var>.</p></li>
   4685 
   4686   <li><p>Return <var data-x="">date</var>.</p></li>
   4687 
   4688  </ol>
   4689 
   4690  <p>The rules to <dfn>parse a date component</dfn>, given an <var data-x="">input</var> string and a
   4691  <var data-x="">position</var>, are as follows. This will return either a year, a month, and a day,
   4692  or nothing. If at any point the algorithm says that it "fails", this means that it is aborted at
   4693  that point and returns nothing.</p>
   4694 
   4695  <ol>
   4696 
   4697   <li><p><span>Parse a month component</span> to obtain <var data-x="">year</var> and <var
   4698   data-x="">month</var>. If this returns nothing, then fail.</li>
   4699 
   4700   <li><p>Let <var data-x="">maxday</var> be the <span>number of days in month <var
   4701   data-x="">month</var> of year <var data-x="">year</var></span>.</p></li>
   4702 
   4703   <li><p>If <var data-x="">position</var> is beyond the end of <var data-x="">input</var> or if the
   4704   character at <var data-x="">position</var> is not a U+002D HYPHEN-MINUS character, then fail.
   4705   Otherwise, move <var data-x="">position</var> forwards one character.</p></li>
   4706 
   4707   <li><p><span>Collect a sequence of characters</span> that are <span>ASCII digits</span>. If the
   4708   collected sequence is not exactly two characters long, then fail. Otherwise, interpret the
   4709   resulting sequence as a base-ten integer. Let that number be the <var
   4710   data-x="">day</var>.</p></li>
   4711 
   4712   <li><p>If <var data-x="">day</var> is not a number in the range 1&nbsp;&le;&nbsp;<var
   4713   data-x="">day</var>&nbsp;&le;&nbsp;<var data-x="">maxday</var>, then fail.</li>
   4714 
   4715   <li><p>Return <var data-x="">year</var>, <var data-x="">month</var>, and <var
   4716   data-x="">day</var>.</p></li>
   4717 
   4718  </ol>
   4719 
   4720  </div>
   4721 
   4722 
   4723  <h5>Yearless dates</h5>
   4724 
   4725  <p>A <dfn data-x="concept-yearless-date">yearless date</dfn> consists of a Gregorian month and a
   4726  day within that month, but with no associated year. <a href="#refsGREGORIAN">[GREGORIAN]</a></p>
   4727 
   4728  <p>A string is a <dfn>valid yearless date string</dfn> representing a month <var
   4729  data-x="">month</var> and a day <var data-x="">day</var> if it consists of the following components
   4730  in the given order:</p>
   4731 
   4732  <ol>
   4733 
   4734   <li>Optionally, two U+002D HYPHEN-MINUS characters (-)</li>
   4735 
   4736   <li>Two <span>ASCII digits</span>, representing the month <var data-x="">month</var>, in the range
   4737   1&nbsp;&le;&nbsp;<var data-x="">month</var>&nbsp;&le;&nbsp;12</li>
   4738 
   4739   <li>A U+002D HYPHEN-MINUS character (-)</li>
   4740 
   4741   <li>Two <span>ASCII digits</span>, representing <var data-x="">day</var>, in the range
   4742   1&nbsp;&le;&nbsp;<var data-x="">day</var>&nbsp;&le;&nbsp;<var data-x="">maxday</var> where <var
   4743   data-x="">maxday</var> is the <span data-x="number of days in month month of year year">number of
   4744   days</span> in the month <var data-x="">month</var> and any arbitrary leap year (e.g. 4 or
   4745   2000)</li>
   4746 
   4747  </ol>
   4748 
   4749  <p class="note">In other words, if the <var data-x="">month</var> is "<code data-x="">02</code>",
   4750  meaning February, then the day can be 29, as if the year was a leap year.</p>
   4751 
   4752  <div class="nodev">
   4753 
   4754  <p>The rules to <dfn>parse a yearless date string</dfn> are as follows. This will return either a
   4755  month and a day, or nothing. If at any point the algorithm says that it "fails", this means that
   4756  it is aborted at that point and returns nothing.</p>
   4757 
   4758  <ol>
   4759 
   4760   <li><p>Let <var data-x="">input</var> be the string being parsed.</p></li>
   4761 
   4762   <li><p>Let <var data-x="">position</var> be a pointer into <var data-x="">input</var>, initially
   4763   pointing at the start of the string.</p></li>
   4764 
   4765   <li><p><span>Parse a yearless date component</span> to obtain <var data-x="">month</var> and <var
   4766   data-x="">day</var>. If this returns nothing, then fail.</p>
   4767 
   4768   <li><p>If <var data-x="">position</var> is <em>not</em> beyond the end of <var
   4769   data-x="">input</var>, then fail.</p></li>
   4770 
   4771   <li><p>Return <var data-x="">month</var> and <var data-x="">day</var>.</p></li>
   4772 
   4773  </ol>
   4774 
   4775  <p>The rules to <dfn>parse a yearless date component</dfn>, given an <var data-x="">input</var>
   4776  string and a <var data-x="">position</var>, are as follows. This will return either a month and a
   4777  day, or nothing. If at any point the algorithm says that it "fails", this means that it is aborted
   4778  at that point and returns nothing.</p>
   4779 
   4780  <ol>
   4781 
   4782   <li><p><span>Collect a sequence of characters</span> that are U+002D HYPHEN-MINUS characters (-).
   4783   If the collected sequence is not exactly zero or two characters long, then fail.</p></li>
   4784 
   4785   <li><p><span>Collect a sequence of characters</span> that are <span>ASCII digits</span>. If the
   4786   collected sequence is not exactly two characters long, then fail. Otherwise, interpret the
   4787   resulting sequence as a base-ten integer. Let that number be the <var
   4788   data-x="">month</var>.</p></li>
   4789 
   4790   <li><p>If <var data-x="">month</var> is not a number in the range 1&nbsp;&le;&nbsp;<var
   4791   data-x="">month</var>&nbsp;&le;&nbsp;12, then fail.</p></li>
   4792 
   4793   <li><p>Let <var data-x="">maxday</var> be the <span data-x="number of days in month month of year
   4794   year">number of days</span> in month <var data-x="">month</var> of any arbitrary leap year (e.g. 4
   4795   or 2000).</p></li>
   4796 
   4797   <li><p>If <var data-x="">position</var> is beyond the end of <var data-x="">input</var> or if the
   4798   character at <var data-x="">position</var> is not a U+002D HYPHEN-MINUS character, then fail.
   4799   Otherwise, move <var data-x="">position</var> forwards one character.</p></li>
   4800 
   4801   <li><p><span>Collect a sequence of characters</span> that are <span>ASCII digits</span>. If the
   4802   collected sequence is not exactly two characters long, then fail. Otherwise, interpret the
   4803   resulting sequence as a base-ten integer. Let that number be the <var
   4804   data-x="">day</var>.</p></li>
   4805 
   4806   <li><p>If <var data-x="">day</var> is not a number in the range 1&nbsp;&le;&nbsp;<var
   4807   data-x="">day</var>&nbsp;&le;&nbsp;<var data-x="">maxday</var>, then fail.</li>
   4808 
   4809   <li><p>Return <var data-x="">month</var> and <var data-x="">day</var>.</p></li>
   4810 
   4811  </ol>
   4812 
   4813  </div>
   4814 
   4815 
   4816  <h5>Times</h5>
   4817 
   4818  <p>A <dfn data-x="concept-time">time</dfn> consists of a specific time with no time-zone
   4819  information, consisting of an hour, a minute, a second, and a fraction of a second.</p>
   4820 
   4821  <p>A string is a <dfn>valid time string</dfn> representing an hour <var data-x="">hour</var>, a
   4822  minute <var data-x="">minute</var>, and a second <var data-x="">second</var> if it consists of the
   4823  following components in the given order:</p>
   4824 
   4825  <ol>
   4826 
   4827   <li>Two <span>ASCII digits</span>, representing <var data-x="">hour</var>, in the range
   4828   0&nbsp;&le;&nbsp;<var data-x="">hour</var>&nbsp;&le;&nbsp;23</li>
   4829 
   4830   <li>A U+003A COLON character (:)</li>
   4831 
   4832   <li>Two <span>ASCII digits</span>, representing <var data-x="">minute</var>, in the range
   4833   0&nbsp;&le;&nbsp;<var data-x="">minute</var>&nbsp;&le;&nbsp;59</li>
   4834 
   4835   <li>If <var data-x="">second</var> is non-zero, or optionally if <var data-x="">second</var> is
   4836   zero:
   4837 
   4838    <ol>
   4839 
   4840     <li>A U+003A COLON character (:)</li>
   4841 
   4842     <li>Two <span>ASCII digits</span>, representing the integer part of <var data-x="">second</var>,
   4843     in the range 0&nbsp;&le;&nbsp;<var data-x="">s</var>&nbsp;&le;&nbsp;59</li>
   4844 
   4845     <li>If <var data-x="">second</var> is not an integer, or optionally if <var
   4846     data-x="">second</var> is an integer:
   4847 
   4848      <ol>
   4849 
   4850       <li>A 002E FULL STOP character (.)</li>
   4851 
   4852       <li>One, two, or three <span>ASCII digits</span>, representing the fractional part of <var
   4853       data-x="">second</var></li>
   4854 
   4855      </ol>
   4856 
   4857     </li>
   4858 
   4859    </ol>
   4860 
   4861   </li>
   4862 
   4863  </ol>
   4864 
   4865  <p class="note">The <var data-x="">second</var> component cannot be 60 or 61; leap seconds cannot
   4866  be represented.</p>
   4867 
   4868  <div class="nodev">
   4869 
   4870  <p>The rules to <dfn>parse a time string</dfn> are as follows. This will return either a time, or
   4871  nothing. If at any point the algorithm says that it "fails", this means that it is aborted at that
   4872  point and returns nothing.</p>
   4873 
   4874  <ol>
   4875 
   4876   <li><p>Let <var data-x="">input</var> be the string being parsed.</p></li>
   4877 
   4878   <li><p>Let <var data-x="">position</var> be a pointer into <var data-x="">input</var>, initially
   4879   pointing at the start of the string.</p></li>
   4880 
   4881   <li><p><span>Parse a time component</span> to obtain <var data-x="">hour</var>, <var
   4882   data-x="">minute</var>, and <var data-x="">second</var>. If this returns nothing, then fail.</p>
   4883 
   4884   <li><p>If <var data-x="">position</var> is <em>not</em> beyond the end of <var
   4885   data-x="">input</var>, then fail.</p></li>
   4886 
   4887   <li><p>Let <var data-x="">time</var> be the time with hour <var data-x="">hour</var>, minute <var
   4888   data-x="">minute</var>, and second <var data-x="">second</var>.</p></li>
   4889 
   4890   <li><p>Return <var data-x="">time</var>.</p></li>
   4891 
   4892  </ol>
   4893 
   4894  <p>The rules to <dfn>parse a time component</dfn>, given an <var data-x="">input</var> string and a
   4895  <var data-x="">position</var>, are as follows. This will return either an hour, a minute, and a
   4896  second, or nothing. If at any point the algorithm says that it "fails", this means that it is
   4897  aborted at that point and returns nothing.</p>
   4898 
   4899  <ol>
   4900 
   4901   <li><p><span>Collect a sequence of characters</span> that are <span>ASCII digits</span>. If the
   4902   collected sequence is not exactly two characters long, then fail. Otherwise, interpret the
   4903   resulting sequence as a base-ten integer. Let that number be the <var
   4904   data-x="">hour</var>.</p></li>
   4905 
   4906   <li>If <var data-x="">hour</var> is not a number in the range 0&nbsp;&le;&nbsp;<var
   4907   data-x="">hour</var>&nbsp;&le;&nbsp;23, then fail.</li>
   4908 
   4909   <li><p>If <var data-x="">position</var> is beyond the end of <var data-x="">input</var> or if the
   4910   character at <var data-x="">position</var> is not a U+003A COLON character, then fail. Otherwise,
   4911   move <var data-x="">position</var> forwards one character.</p></li>
   4912 
   4913   <li><p><span>Collect a sequence of characters</span> that are <span>ASCII digits</span>. If the
   4914   collected sequence is not exactly two characters long, then fail. Otherwise, interpret the
   4915   resulting sequence as a base-ten integer. Let that number be the <var
   4916   data-x="">minute</var>.</p></li>
   4917 
   4918   <li>If <var data-x="">minute</var> is not a number in the range 0&nbsp;&le;&nbsp;<var
   4919   data-x="">minute</var>&nbsp;&le;&nbsp;59, then fail.</li>
   4920 
   4921   <li><p>Let <var data-x="">second</var> be a string with the value "0".</p></li>
   4922 
   4923   <li>
   4924 
   4925    <p>If <var data-x="">position</var> is not beyond the end of <var data-x="">input</var> and the
   4926    character at <var data-x="">position</var> is a U+003A COLON, then run these substeps:</p>
   4927 
   4928    <ol>
   4929 
   4930     <li><p>Advance <var data-x="">position</var> to the next character in <var
   4931     data-x="">input</var>.</p></li>
   4932 
   4933     <li><p>If <var data-x="">position</var> is beyond the end of <var data-x="">input</var>, or at
   4934     the last character in <var data-x="">input</var>, or if the next <em>two</em> characters in <var
   4935     data-x="">input</var> starting at <var data-x="">position</var> are not both <span>ASCII
   4936     digits</span>, then fail.</p></li>
   4937 
   4938     <li><p><span>Collect a sequence of characters</span> that are either <span>ASCII digits</span>
   4939     or U+002E FULL STOP characters. If the collected sequence is three characters long, or if it is
   4940     longer than three characters long and the third character is not a U+002E FULL STOP character,
   4941     or if it has more than one U+002E FULL STOP character, then fail. Otherwise, let the collected
   4942     string be <var data-x="">second</var> instead of its previous value.</p></li>
   4943 
   4944    </ol>
   4945 
   4946   </li>
   4947 
   4948   <li><p>Interpret <var data-x="">second</var> as a base-ten number (possibly with a fractional
   4949   part). Let <var data-x="">second</var> be that number instead of the string version.</p></li>
   4950 
   4951   <li><p>If <var data-x="">second</var> is not a number in the range 0&nbsp;&le;&nbsp;<var
   4952   data-x="">second</var>&nbsp;&lt;&nbsp;60, then fail.</p></li>
   4953 
   4954   <li><p>Return <var data-x="">hour</var>, <var data-x="">minute</var>, and <var
   4955   data-x="">second</var>.</p></li>
   4956 
   4957  </ol>
   4958 
   4959  </div>
   4960 
   4961 
   4962  <h5>Local dates and times</h5>
   4963 
   4964  <p>A <dfn data-x="concept-datetime-local">local date and time</dfn> consists of a specific
   4965  <span>proleptic-Gregorian date</span>, consisting of a year, a month, and a day, and a time,
   4966  consisting of an hour, a minute, a second, and a fraction of a second, but expressed without a
   4967  time zone. <a href="#refsGREGORIAN">[GREGORIAN]</a></p>
   4968 
   4969  <p>A string is a <dfn>valid local date and time string</dfn> representing a date and time if it
   4970  consists of the following components in the given order:</p>
   4971 
   4972  <ol>
   4973 
   4974   <li>A <span>valid date string</span> representing the date</li>
   4975 
   4976   <li>A U+0054 LATIN CAPITAL LETTER T character (T) or a U+0020 SPACE character</li>
   4977 
   4978   <li>A <span>valid time string</span> representing the time</li>
   4979 
   4980  </ol>
   4981 
   4982  <p>A string is a <dfn>valid normalised local date and time string</dfn> representing a date and
   4983  time if it consists of the following components in the given order:</p>
   4984 
   4985  <ol>
   4986 
   4987   <li>A <span>valid date string</span> representing the date</li>
   4988 
   4989   <li>A U+0054 LATIN CAPITAL LETTER T character (T)</li>
   4990 
   4991   <li>A <span>valid time string</span> representing the time, expressed as the shortest possible
   4992   string for the given time (e.g. omitting the seconds component entirely if the given time is zero
   4993   seconds past the minute)</li>
   4994 
   4995  </ol>
   4996 
   4997  <div class="nodev">
   4998 
   4999  <p>The rules to <dfn>parse a local date and time string</dfn> are as follows. This will return
   5000  either a date and time, or nothing. If at any point the algorithm says that it "fails", this means